425 files changed, 49339 insertions, 27155 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f8ec8fea8..6e66dc1df 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -142,6 +142,7 @@ add_subdirectory(core)
 add_subdirectory(audio_core)
 add_subdirectory(video_core)
 add_subdirectory(input_common)
+add_subdirectory(shader_recompiler)
 add_subdirectory(tests)
 
 if (ENABLE_SDL2)
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 7f22ea97a..57922b51c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,8 +1,3 @@
-# Add a custom command to generate a new shader_cache_version hash when any of the following files change
-# NOTE: This is an approximation of what files affect shader generation, its possible something else
-# could affect the result, but much more unlikely than the following files. Keeping a list of files
-# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
-set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
 if (DEFINED ENV{AZURECIREPO})
   set(BUILD_REPOSITORY $ENV{AZURECIREPO})
 endif()
@@ -30,64 +25,7 @@ add_custom_command(OUTPUT scm_rev.cpp
       -DGIT_EXECUTABLE=${GIT_EXECUTABLE}
       -P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake
     DEPENDS
-      # WARNING! It was too much work to try and make a common location for this list,
-      # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
-      "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
-      "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
-      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
-      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
-      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
-      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
-      "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
-      "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
-      "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
-      "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
-      "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
-      "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
-      "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
-      "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
-      "${VIDEO_CORE}/shader/decode/bfe.cpp"
-      "${VIDEO_CORE}/shader/decode/bfi.cpp"
-      "${VIDEO_CORE}/shader/decode/conversion.cpp"
-      "${VIDEO_CORE}/shader/decode/ffma.cpp"
-      "${VIDEO_CORE}/shader/decode/float_set.cpp"
-      "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
-      "${VIDEO_CORE}/shader/decode/half_set.cpp"
-      "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
-      "${VIDEO_CORE}/shader/decode/hfma2.cpp"
-      "${VIDEO_CORE}/shader/decode/image.cpp"
-      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
-      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
-      "${VIDEO_CORE}/shader/decode/memory.cpp"
-      "${VIDEO_CORE}/shader/decode/texture.cpp"
-      "${VIDEO_CORE}/shader/decode/other.cpp"
-      "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
-      "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
-      "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
-      "${VIDEO_CORE}/shader/decode/shift.cpp"
-      "${VIDEO_CORE}/shader/decode/video.cpp"
-      "${VIDEO_CORE}/shader/decode/warp.cpp"
-      "${VIDEO_CORE}/shader/decode/xmad.cpp"
-      "${VIDEO_CORE}/shader/ast.cpp"
-      "${VIDEO_CORE}/shader/ast.h"
-      "${VIDEO_CORE}/shader/compiler_settings.cpp"
-      "${VIDEO_CORE}/shader/compiler_settings.h"
-      "${VIDEO_CORE}/shader/control_flow.cpp"
-      "${VIDEO_CORE}/shader/control_flow.h"
-      "${VIDEO_CORE}/shader/decode.cpp"
-      "${VIDEO_CORE}/shader/expr.cpp"
-      "${VIDEO_CORE}/shader/expr.h"
-      "${VIDEO_CORE}/shader/node.h"
-      "${VIDEO_CORE}/shader/node_helper.cpp"
-      "${VIDEO_CORE}/shader/node_helper.h"
-      "${VIDEO_CORE}/shader/registry.cpp"
-      "${VIDEO_CORE}/shader/registry.h"
-      "${VIDEO_CORE}/shader/shader_ir.cpp"
-      "${VIDEO_CORE}/shader/shader_ir.h"
-      "${VIDEO_CORE}/shader/track.cpp"
-      "${VIDEO_CORE}/shader/transform_feedback.cpp"
-      "${VIDEO_CORE}/shader/transform_feedback.h"
-      # and also check that the scm_rev files haven't changed
+      # Check that the scm_rev files haven't changed
       "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
       "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
       # technically we should regenerate if the git version changed, but its not worth the effort imo
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp
index 4f2cc29e1..f055f0e11 100644
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -144,6 +144,10 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
     SUB(Render, Software)                                                                          \
     SUB(Render, OpenGL)                                                                            \
     SUB(Render, Vulkan)                                                                            \
+    CLS(Shader)                                                                                    \
+    SUB(Shader, SPIRV)                                                                             \
+    SUB(Shader, GLASM)                                                                             \
+    SUB(Shader, GLSL)                                                                              \
     CLS(Audio)                                                                                     \
     SUB(Audio, DSP)                                                                                \
     SUB(Audio, Sink)                                                                               \
diff --git a/src/common/logging/types.h b/src/common/logging/types.h
index 88b0e9c01..7ad0334fc 100644
--- a/src/common/logging/types.h
+++ b/src/common/logging/types.h
@@ -114,6 +114,10 @@ enum class Class : u8 {
     Render_Software,   ///< Software renderer backend
     Render_OpenGL,     ///< OpenGL backend
     Render_Vulkan,     ///< Vulkan backend
+    Shader,            ///< Shader recompiler
+    Shader_SPIRV,      ///< Shader SPIR-V code generation
+    Shader_GLASM,      ///< Shader GLASM code generation
+    Shader_GLSL,       ///< Shader GLSL code generation
     Audio,             ///< Audio emulation
     Audio_DSP,         ///< The HLE implementation of the DSP
     Audio_Sink,        ///< Emulator audio output backend
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 5f126f324..cc88994c6 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -14,7 +14,6 @@
 #define BUILD_ID "@BUILD_ID@"
 #define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@"
 #define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@"
-#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"
 
 namespace Common {
 
@@ -28,7 +27,6 @@ const char g_build_version[]  = BUILD_VERSION;
 const char g_build_id[] = BUILD_ID;
 const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE;
 const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING;
-const char g_shader_cache_version[] = SHADER_CACHE_VERSION;
 
 } // namespace
 
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index bf5514386..66268ea0f 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -57,7 +57,7 @@ void LogSettings() {
     log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
     log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
     log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
-    log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue());
+    log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
     log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
     log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
     log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
@@ -140,7 +140,7 @@ void RestoreGlobalState(bool is_powered_on) {
     values.use_nvdec_emulation.SetGlobal(true);
     values.accelerate_astc.SetGlobal(true);
     values.use_vsync.SetGlobal(true);
-    values.use_assembly_shaders.SetGlobal(true);
+    values.shader_backend.SetGlobal(true);
     values.use_asynchronous_shaders.SetGlobal(true);
     values.use_fast_gpu_time.SetGlobal(true);
     values.use_caches_gc.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index ce1bc647d..32dfb1d9f 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -24,6 +24,12 @@ enum class RendererBackend : u32 {
     Vulkan = 1,
 };
 
+enum class ShaderBackend : u32 {
+    GLSL = 0,
+    GLASM = 1,
+    SPIRV = 2,
+};
+
 enum class GPUAccuracy : u32 {
     Normal = 0,
     High = 1,
@@ -308,6 +314,9 @@ struct Values {
     // Renderer
     Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"};
     BasicSetting<bool> renderer_debug{false, "debug"};
+    BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
+    BasicSetting<bool> disable_shader_loop_safety_checks{false,
+                                                         "disable_shader_loop_safety_checks"};
     Setting<int> vulkan_device{0, "vulkan_device"};
 
     Setting<u16> resolution_factor{1, "resolution_factor"};
@@ -331,7 +340,7 @@ struct Values {
     Setting<bool> accelerate_astc{true, "accelerate_astc"};
     Setting<bool> use_vsync{true, "use_vsync"};
     BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"};
-    Setting<bool> use_assembly_shaders{false, "use_assembly_shaders"};
+    Setting<ShaderBackend> shader_backend{ShaderBackend::GLASM, "shader_backend"};
     Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
     Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
     Setting<bool> use_caches_gc{false, "use_caches_gc"};
diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h
index 8272985ff..cd0017726 100644
--- a/src/common/thread_worker.h
+++ b/src/common/thread_worker.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <atomic>
+#include <condition_variable>
 #include <functional>
 #include <mutex>
 #include <stop_token>
@@ -39,7 +40,7 @@ public:
         const auto lambda = [this, func](std::stop_token stop_token) {
             Common::SetCurrentThreadName(thread_name.c_str());
             {
-                std::conditional_t<with_state, StateType, int> state{func()};
+                [[maybe_unused]] std::conditional_t<with_state, StateType, int> state{func()};
                 while (!stop_token.stop_requested()) {
                     Task task;
                     {
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index cfaf50105..365b8f906 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -62,7 +62,6 @@ json GetYuzuVersionData() {
         {"build_date", std::string(Common::g_build_date)},
         {"build_fullname", std::string(Common::g_build_fullname)},
         {"build_version", std::string(Common::g_build_version)},
-        {"shader_cache_version", std::string(Common::g_shader_cache_version)},
     };
 }
 
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 066cb23e4..422de3a7d 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -233,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
              Settings::values.use_nvdec_emulation.GetValue());
     AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
     AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
-    AddField(field_type, "Renderer_UseAssemblyShaders",
-             Settings::values.use_assembly_shaders.GetValue());
+    AddField(field_type, "Renderer_ShaderBackend",
+             static_cast<u32>(Settings::values.shader_backend.GetValue()));
     AddField(field_type, "Renderer_UseAsynchronousShaders",
              Settings::values.use_asynchronous_shaders.GetValue());
     AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue());
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
new file mode 100644
index 000000000..b5b7e5e83
--- /dev/null
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -0,0 +1,268 @@
+add_library(shader_recompiler STATIC
+    backend/bindings.h
+    backend/glasm/emit_context.cpp
+    backend/glasm/emit_context.h
+    backend/glasm/emit_glasm.cpp
+    backend/glasm/emit_glasm.h
+    backend/glasm/emit_glasm_barriers.cpp
+    backend/glasm/emit_glasm_bitwise_conversion.cpp
+    backend/glasm/emit_glasm_composite.cpp
+    backend/glasm/emit_glasm_context_get_set.cpp
+    backend/glasm/emit_glasm_control_flow.cpp
+    backend/glasm/emit_glasm_convert.cpp
+    backend/glasm/emit_glasm_floating_point.cpp
+    backend/glasm/emit_glasm_image.cpp
+    backend/glasm/emit_glasm_instructions.h
+    backend/glasm/emit_glasm_integer.cpp
+    backend/glasm/emit_glasm_logical.cpp
+    backend/glasm/emit_glasm_memory.cpp
+    backend/glasm/emit_glasm_not_implemented.cpp
+    backend/glasm/emit_glasm_select.cpp
+    backend/glasm/emit_glasm_shared_memory.cpp
+    backend/glasm/emit_glasm_special.cpp
+    backend/glasm/emit_glasm_undefined.cpp
+    backend/glasm/emit_glasm_warp.cpp
+    backend/glasm/reg_alloc.cpp
+    backend/glasm/reg_alloc.h
+    backend/glsl/emit_context.cpp
+    backend/glsl/emit_context.h
+    backend/glsl/emit_glsl.cpp
+    backend/glsl/emit_glsl.h
+    backend/glsl/emit_glsl_atomic.cpp
+    backend/glsl/emit_glsl_barriers.cpp
+    backend/glsl/emit_glsl_bitwise_conversion.cpp
+    backend/glsl/emit_glsl_composite.cpp
+    backend/glsl/emit_glsl_context_get_set.cpp
+    backend/glsl/emit_glsl_control_flow.cpp
+    backend/glsl/emit_glsl_convert.cpp
+    backend/glsl/emit_glsl_floating_point.cpp
+    backend/glsl/emit_glsl_image.cpp
+    backend/glsl/emit_glsl_instructions.h
+    backend/glsl/emit_glsl_integer.cpp
+    backend/glsl/emit_glsl_logical.cpp
+    backend/glsl/emit_glsl_memory.cpp
+    backend/glsl/emit_glsl_not_implemented.cpp
+    backend/glsl/emit_glsl_select.cpp
+    backend/glsl/emit_glsl_shared_memory.cpp
+    backend/glsl/emit_glsl_special.cpp
+    backend/glsl/emit_glsl_undefined.cpp
+    backend/glsl/emit_glsl_warp.cpp
+    backend/glsl/var_alloc.cpp
+    backend/glsl/var_alloc.h
+    backend/spirv/emit_context.cpp
+    backend/spirv/emit_context.h
+    backend/spirv/emit_spirv.cpp
+    backend/spirv/emit_spirv.h
+    backend/spirv/emit_spirv_atomic.cpp
+    backend/spirv/emit_spirv_barriers.cpp
+    backend/spirv/emit_spirv_bitwise_conversion.cpp
+    backend/spirv/emit_spirv_composite.cpp
+    backend/spirv/emit_spirv_context_get_set.cpp
+    backend/spirv/emit_spirv_control_flow.cpp
+    backend/spirv/emit_spirv_convert.cpp
+    backend/spirv/emit_spirv_floating_point.cpp
+    backend/spirv/emit_spirv_image.cpp
+    backend/spirv/emit_spirv_image_atomic.cpp
+    backend/spirv/emit_spirv_instructions.h
+    backend/spirv/emit_spirv_integer.cpp
+    backend/spirv/emit_spirv_logical.cpp
+    backend/spirv/emit_spirv_memory.cpp
+    backend/spirv/emit_spirv_select.cpp
+    backend/spirv/emit_spirv_shared_memory.cpp
+    backend/spirv/emit_spirv_special.cpp
+    backend/spirv/emit_spirv_undefined.cpp
+    backend/spirv/emit_spirv_warp.cpp
+    environment.h
+    exception.h
+    frontend/ir/abstract_syntax_list.h
+    frontend/ir/attribute.cpp
+    frontend/ir/attribute.h
+    frontend/ir/basic_block.cpp
+    frontend/ir/basic_block.h
+    frontend/ir/breadth_first_search.h
+    frontend/ir/condition.cpp
+    frontend/ir/condition.h
+    frontend/ir/flow_test.cpp
+    frontend/ir/flow_test.h
+    frontend/ir/ir_emitter.cpp
+    frontend/ir/ir_emitter.h
+    frontend/ir/microinstruction.cpp
+    frontend/ir/modifiers.h
+    frontend/ir/opcodes.cpp
+    frontend/ir/opcodes.h
+    frontend/ir/opcodes.inc
+    frontend/ir/patch.cpp
+    frontend/ir/patch.h
+    frontend/ir/post_order.cpp
+    frontend/ir/post_order.h
+    frontend/ir/pred.h
+    frontend/ir/program.cpp
+    frontend/ir/program.h
+    frontend/ir/reg.h
+    frontend/ir/type.cpp
+    frontend/ir/type.h
+    frontend/ir/value.cpp
+    frontend/ir/value.h
+    frontend/maxwell/control_flow.cpp
+    frontend/maxwell/control_flow.h
+    frontend/maxwell/decode.cpp
+    frontend/maxwell/decode.h
+    frontend/maxwell/indirect_branch_table_track.cpp
+    frontend/maxwell/indirect_branch_table_track.h
+    frontend/maxwell/instruction.h
+    frontend/maxwell/location.h
+    frontend/maxwell/maxwell.inc
+    frontend/maxwell/opcodes.cpp
+    frontend/maxwell/opcodes.h
+    frontend/maxwell/structured_control_flow.cpp
+    frontend/maxwell/structured_control_flow.h
+    frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
+    frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
+    frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
+    frontend/maxwell/translate/impl/barrier_operations.cpp
+    frontend/maxwell/translate/impl/bitfield_extract.cpp
+    frontend/maxwell/translate/impl/bitfield_insert.cpp
+    frontend/maxwell/translate/impl/branch_indirect.cpp
+    frontend/maxwell/translate/impl/common_encoding.h
+    frontend/maxwell/translate/impl/common_funcs.cpp
+    frontend/maxwell/translate/impl/common_funcs.h
+    frontend/maxwell/translate/impl/condition_code_set.cpp
+    frontend/maxwell/translate/impl/double_add.cpp
+    frontend/maxwell/translate/impl/double_compare_and_set.cpp
+    frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
+    frontend/maxwell/translate/impl/double_min_max.cpp
+    frontend/maxwell/translate/impl/double_multiply.cpp
+    frontend/maxwell/translate/impl/double_set_predicate.cpp
+    frontend/maxwell/translate/impl/exit_program.cpp
+    frontend/maxwell/translate/impl/find_leading_one.cpp
+    frontend/maxwell/translate/impl/floating_point_add.cpp
+    frontend/maxwell/translate/impl/floating_point_compare.cpp
+    frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
+    frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
+    frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+    frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+    frontend/maxwell/translate/impl/floating_point_min_max.cpp
+    frontend/maxwell/translate/impl/floating_point_multi_function.cpp
+    frontend/maxwell/translate/impl/floating_point_multiply.cpp
+    frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
+    frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
+    frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
+    frontend/maxwell/translate/impl/half_floating_point_add.cpp
+    frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
+    frontend/maxwell/translate/impl/half_floating_point_helper.cpp
+    frontend/maxwell/translate/impl/half_floating_point_helper.h
+    frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
+    frontend/maxwell/translate/impl/half_floating_point_set.cpp
+    frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
+    frontend/maxwell/translate/impl/impl.cpp
+    frontend/maxwell/translate/impl/impl.h
+    frontend/maxwell/translate/impl/integer_add.cpp
+    frontend/maxwell/translate/impl/integer_add_three_input.cpp
+    frontend/maxwell/translate/impl/integer_compare.cpp
+    frontend/maxwell/translate/impl/integer_compare_and_set.cpp
+    frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
+    frontend/maxwell/translate/impl/integer_funnel_shift.cpp
+    frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
+    frontend/maxwell/translate/impl/integer_popcount.cpp
+    frontend/maxwell/translate/impl/integer_scaled_add.cpp
+    frontend/maxwell/translate/impl/integer_set_predicate.cpp
+    frontend/maxwell/translate/impl/integer_shift_left.cpp
+    frontend/maxwell/translate/impl/integer_shift_right.cpp
+    frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
+    frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
+    frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
+    frontend/maxwell/translate/impl/load_constant.cpp
+    frontend/maxwell/translate/impl/load_constant.h
+    frontend/maxwell/translate/impl/load_effective_address.cpp
+    frontend/maxwell/translate/impl/load_store_attribute.cpp
+    frontend/maxwell/translate/impl/load_store_local_shared.cpp
+    frontend/maxwell/translate/impl/load_store_memory.cpp
+    frontend/maxwell/translate/impl/logic_operation.cpp
+    frontend/maxwell/translate/impl/logic_operation_three_input.cpp
+    frontend/maxwell/translate/impl/move_predicate_to_register.cpp
+    frontend/maxwell/translate/impl/move_register.cpp
+    frontend/maxwell/translate/impl/move_register_to_predicate.cpp
+    frontend/maxwell/translate/impl/move_special_register.cpp
+    frontend/maxwell/translate/impl/not_implemented.cpp
+    frontend/maxwell/translate/impl/output_geometry.cpp
+    frontend/maxwell/translate/impl/pixel_load.cpp
+    frontend/maxwell/translate/impl/predicate_set_predicate.cpp
+    frontend/maxwell/translate/impl/predicate_set_register.cpp
+    frontend/maxwell/translate/impl/select_source_with_predicate.cpp
+    frontend/maxwell/translate/impl/surface_atomic_operations.cpp
+    frontend/maxwell/translate/impl/surface_load_store.cpp
+    frontend/maxwell/translate/impl/texture_fetch.cpp
+    frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+    frontend/maxwell/translate/impl/texture_gather.cpp
+    frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
+    frontend/maxwell/translate/impl/texture_gradient.cpp
+    frontend/maxwell/translate/impl/texture_load.cpp
+    frontend/maxwell/translate/impl/texture_load_swizzled.cpp
+    frontend/maxwell/translate/impl/texture_mipmap_level.cpp
+    frontend/maxwell/translate/impl/texture_query.cpp
+    frontend/maxwell/translate/impl/video_helper.cpp
+    frontend/maxwell/translate/impl/video_helper.h
+    frontend/maxwell/translate/impl/video_minimum_maximum.cpp
+    frontend/maxwell/translate/impl/video_multiply_add.cpp
+    frontend/maxwell/translate/impl/video_set_predicate.cpp
+    frontend/maxwell/translate/impl/vote.cpp
+    frontend/maxwell/translate/impl/warp_shuffle.cpp
+    frontend/maxwell/translate/translate.cpp
+    frontend/maxwell/translate/translate.h
+    frontend/maxwell/translate_program.cpp
+    frontend/maxwell/translate_program.h
+    host_translate_info.h
+    ir_opt/collect_shader_info_pass.cpp
+    ir_opt/constant_propagation_pass.cpp
+    ir_opt/dead_code_elimination_pass.cpp
+    ir_opt/dual_vertex_pass.cpp
+    ir_opt/global_memory_to_storage_buffer_pass.cpp
+    ir_opt/identity_removal_pass.cpp
+    ir_opt/lower_fp16_to_fp32.cpp
+    ir_opt/lower_int64_to_int32.cpp
+    ir_opt/passes.h
+    ir_opt/ssa_rewrite_pass.cpp
+    ir_opt/texture_pass.cpp
+    ir_opt/verification_pass.cpp
+    object_pool.h
+    profile.h
+    program_header.h
+    runtime_info.h
+    shader_info.h
+    varying_state.h
+)
+
+target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit)
+
+if (MSVC)
+    target_compile_options(shader_recompiler PRIVATE
+        /W4
+        /WX
+        /we4018 # 'expression' : signed/unsigned mismatch
+        /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
+        /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
+        /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
+        /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
+        /we4305 # 'context' : truncation from 'type1' to 'type2'
+        /we4800 # Implicit conversion from 'type' to bool. Possible information loss
+        /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior.
+    )
+else()
+    target_compile_options(shader_recompiler PRIVATE
+        -Werror
+        -Werror=conversion
+        -Werror=ignored-qualifiers
+        -Werror=implicit-fallthrough
+        -Werror=shadow
+        -Werror=sign-compare
+        $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
+        $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
+        -Werror=unused-variable
+
+        # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6.
+        # And this in turns limits the size of a std::array.
+        $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024>
+    )
+endif()
+
+create_target_directory_groups(shader_recompiler)
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h
new file mode 100644
index 000000000..35503000c
--- /dev/null
+++ b/src/shader_recompiler/backend/bindings.h
@@ -0,0 +1,19 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader::Backend {
+
+struct Bindings {
+    u32 unified{};
+    u32 uniform_buffer{};
+    u32 storage_buffer{};
+    u32 texture{};
+    u32 image{};
+};
+
+} // namespace Shader::Backend
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
new file mode 100644
index 000000000..069c019ad
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -0,0 +1,154 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+std::string_view InterpDecorator(Interpolation interp) {
+    switch (interp) {
+    case Interpolation::Smooth:
+        return "";
+    case Interpolation::Flat:
+        return "FLAT ";
+    case Interpolation::NoPerspective:
+        return "NOPERSPECTIVE ";
+    }
+    throw InvalidArgument("Invalid interpolation {}", interp);
+}
+
+bool IsInputArray(Stage stage) {
+    return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+           stage == Stage::TessellationEval;
+}
+} // Anonymous namespace
+
+EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+                         const RuntimeInfo& runtime_info_)
+    : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
+    // FIXME: Temporary partial implementation
+    u32 cbuf_index{};
+    for (const auto& desc : info.constant_buffer_descriptors) {
+        if (desc.count != 1) {
+            throw NotImplementedException("Constant buffer descriptor array");
+        }
+        Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
+        ++cbuf_index;
+    }
+    u32 ssbo_index{};
+    for (const auto& desc : info.storage_buffers_descriptors) {
+        if (desc.count != 1) {
+            throw NotImplementedException("Storage buffer descriptor array");
+        }
+        if (runtime_info.glasm_use_storage_buffers) {
+            Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
+            ++bindings.storage_buffer;
+            ++ssbo_index;
+        }
+    }
+    if (!runtime_info.glasm_use_storage_buffers) {
+        if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
+            Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
+        }
+    }
+    stage = program.stage;
+    switch (program.stage) {
+    case Stage::VertexA:
+    case Stage::VertexB:
+        stage_name = "vertex";
+        attrib_name = "vertex";
+        break;
+    case Stage::TessellationControl:
+    case Stage::TessellationEval:
+        stage_name = "primitive";
+        attrib_name = "primitive";
+        break;
+    case Stage::Geometry:
+        stage_name = "primitive";
+        attrib_name = "vertex";
+        break;
+    case Stage::Fragment:
+        stage_name = "fragment";
+        attrib_name = "fragment";
+        break;
+    case Stage::Compute:
+        stage_name = "invocation";
+        break;
+    }
+    const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"};
+    const VaryingState loads{info.loads.mask | info.passthrough.mask};
+    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+        if (loads.Generic(index)) {
+            Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};",
+                InterpDecorator(info.interpolation[index]), index, attr_stage, index, index);
+        }
+    }
+    if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) {
+        Add("ATTRIB vertex_position=vertex.position;");
+    }
+    if (info.uses_invocation_id) {
+        Add("ATTRIB primitive_invocation=primitive.invocation;");
+    }
+    if (info.stores_tess_level_outer) {
+        Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};");
+    }
+    if (info.stores_tess_level_inner) {
+        Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};");
+    }
+    if (info.stores.ClipDistances()) {
+        Add("OUTPUT result_clip[]={{result.clip[0..7]}};");
+    }
+    for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+        if (!info.uses_patches[index]) {
+            continue;
+        }
+        if (stage == Stage::TessellationControl) {
+            Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};"
+                "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};",
+                index, index, index, index, index, index);
+        } else {
+            Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index,
+                index, index);
+        }
+    }
+    if (stage == Stage::Fragment) {
+        Add("OUTPUT frag_color0=result.color;");
+        for (size_t index = 1; index < info.stores_frag_color.size(); ++index) {
+            Add("OUTPUT frag_color{}=result.color[{}];", index, index);
+        }
+    }
+    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+        if (info.stores.Generic(index)) {
+            Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index);
+        }
+    }
+    image_buffer_bindings.reserve(info.image_buffer_descriptors.size());
+    for (const auto& desc : info.image_buffer_descriptors) {
+        image_buffer_bindings.push_back(bindings.image);
+        bindings.image += desc.count;
+    }
+    image_bindings.reserve(info.image_descriptors.size());
+    for (const auto& desc : info.image_descriptors) {
+        image_bindings.push_back(bindings.image);
+        bindings.image += desc.count;
+    }
+    texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size());
+    for (const auto& desc : info.texture_buffer_descriptors) {
+        texture_buffer_bindings.push_back(bindings.texture);
+        bindings.texture += desc.count;
+    }
+    texture_bindings.reserve(info.texture_descriptors.size());
+    for (const auto& desc : info.texture_descriptors) {
+        texture_bindings.push_back(bindings.texture);
+        bindings.texture += desc.count;
+    }
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h
new file mode 100644
index 000000000..8433e5c00
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.h
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+struct Info;
+struct Profile;
+struct RuntimeInfo;
+} // namespace Shader
+
+namespace Shader::Backend {
+struct Bindings;
+}
+
+namespace Shader::IR {
+class Inst;
+struct Program;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext {
+public:
+    explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+                         const RuntimeInfo& runtime_info_);
+
+    template <typename... Args>
+    void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
+        code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst),
+                            std::forward<Args>(args)...);
+        // TODO: Remove this
+        code += '\n';
+    }
+
+    template <typename... Args>
+    void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) {
+        code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst),
+                            std::forward<Args>(args)...);
+        // TODO: Remove this
+        code += '\n';
+    }
+
+    template <typename... Args>
+    void Add(const char* format_str, Args&&... args) {
+        code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
+        // TODO: Remove this
+        code += '\n';
+    }
+
+    std::string code;
+    RegAlloc reg_alloc{};
+    const Info& info;
+    const Profile& profile;
+    const RuntimeInfo& runtime_info;
+
+    std::vector<u32> texture_buffer_bindings;
+    std::vector<u32> image_buffer_bindings;
+    std::vector<u32> texture_bindings;
+    std::vector<u32> image_bindings;
+
+    Stage stage{};
+    std::string_view stage_name = "invalid";
+    std::string_view attrib_name = "invalid";
+
+    u32 num_safety_loop_vars{};
+    bool uses_y_direction{};
+};
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
new file mode 100644
index 000000000..a5e8c9b6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -0,0 +1,492 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+
+#include "common/div_ceil.h"
+#include "common/settings.h"
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+    using ReturnType = ReturnType_;
+
+    static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+    template <size_t I>
+    using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <typename T>
+struct Identity {
+    Identity(T data_) : data{data_} {}
+
+    T Extract() {
+        return data;
+    }
+
+    T data;
+};
+
+template <bool scalar>
+class RegWrapper {
+public:
+    RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
+        const Value value{reg_alloc.Peek(ir_value)};
+        if (value.type == Type::Register) {
+            inst = ir_value.InstRecursive();
+            reg = Register{value};
+        } else {
+            reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
+        }
+        switch (value.type) {
+        case Type::Register:
+        case Type::Void:
+            break;
+        case Type::U32:
+            ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
+            break;
+        case Type::U64:
+            ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
+            break;
+        }
+    }
+
+    auto Extract() {
+        if (inst) {
+            reg_alloc.Unref(*inst);
+        } else {
+            reg_alloc.FreeReg(reg);
+        }
+        return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
+    }
+
+private:
+    RegAlloc& reg_alloc;
+    IR::Inst* inst{};
+    Register reg{};
+};
+
+template <typename ArgType>
+class ValueWrapper {
+public:
+    ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
+        : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
+
+    ArgType Extract() {
+        if (!ir_value.IsImmediate()) {
+            reg_alloc.Unref(*ir_value.InstRecursive());
+        }
+        return value;
+    }
+
+private:
+    RegAlloc& reg_alloc;
+    const IR::Value& ir_value;
+    ArgType value;
+};
+
+template <typename ArgType>
+auto Arg(EmitContext& ctx, const IR::Value& arg) {
+    if constexpr (std::is_same_v<ArgType, Register>) {
+        return RegWrapper<false>{ctx, arg};
+    } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
+        return RegWrapper<true>{ctx, arg};
+    } else if constexpr (std::is_base_of_v<Value, ArgType>) {
+        return ValueWrapper<ArgType>{ctx, arg};
+    } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+        return Identity<const IR::Value&>{arg};
+    } else if constexpr (std::is_same_v<ArgType, u32>) {
+        return Identity{arg.U32()};
+    } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+        return Identity{arg.Attribute()};
+    } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+        return Identity{arg.Patch()};
+    } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+        return Identity{arg.Reg()};
+    }
+}
+
+template <auto func, bool is_first_arg_inst>
+struct InvokeCall {
+    template <typename... Args>
+    InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
+        if constexpr (is_first_arg_inst) {
+            func(ctx, *inst, args.Extract()...);
+        } else {
+            func(ctx, args.Extract()...);
+        }
+    }
+};
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+    using Traits = FuncTraits<decltype(func)>;
+    if constexpr (is_first_arg_inst) {
+        InvokeCall<func, is_first_arg_inst>{
+            ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
+    } else {
+        InvokeCall<func, is_first_arg_inst>{
+            ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
+    }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+    using Traits = FuncTraits<decltype(func)>;
+    static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+    if constexpr (Traits::NUM_ARGS == 1) {
+        Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+    } else {
+        using FirstArgType = typename Traits::template ArgType<1>;
+        static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
+        using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+        Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+    }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+    switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...)                                                             \
+    case IR::Opcode::name:                                                                         \
+        return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+    }
+    throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+bool IsReference(IR::Inst& inst) {
+    return inst.GetOpcode() == IR::Opcode::Reference;
+}
+
+void PrecolorInst(IR::Inst& phi) {
+    // Insert phi moves before references to avoid overwritting other phis
+    const size_t num_args{phi.NumArgs()};
+    for (size_t i = 0; i < num_args; ++i) {
+        IR::Block& phi_block{*phi.PhiBlock(i)};
+        auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
+        IR::IREmitter ir{phi_block, it};
+        const IR::Value arg{phi.Arg(i)};
+        if (arg.IsImmediate()) {
+            ir.PhiMove(phi, arg);
+        } else {
+            ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
+        }
+    }
+    for (size_t i = 0; i < num_args; ++i) {
+        IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
+    }
+}
+
+void Precolor(const IR::Program& program) {
+    for (IR::Block* const block : program.blocks) {
+        for (IR::Inst& phi : block->Instructions()) {
+            if (!IR::IsPhi(phi)) {
+                break;
+            }
+            PrecolorInst(phi);
+        }
+    }
+}
+
+void EmitCode(EmitContext& ctx, const IR::Program& program) {
+    const auto eval{
+        [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
+    for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+        switch (node.type) {
+        case IR::AbstractSyntaxNode::Type::Block:
+            for (IR::Inst& inst : node.data.block->Instructions()) {
+                EmitInst(ctx, &inst);
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::If:
+            ctx.Add("MOV.S.CC RC,{};"
+                    "IF NE.x;",
+                    eval(node.data.if_node.cond));
+            break;
+        case IR::AbstractSyntaxNode::Type::EndIf:
+            ctx.Add("ENDIF;");
+            break;
+        case IR::AbstractSyntaxNode::Type::Loop:
+            ctx.Add("REP;");
+            break;
+        case IR::AbstractSyntaxNode::Type::Repeat:
+            if (!Settings::values.disable_shader_loop_safety_checks) {
+                const u32 loop_index{ctx.num_safety_loop_vars++};
+                const u32 vector_index{loop_index / 4};
+                const char component{"xyzw"[loop_index % 4]};
+                ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
+                        "BRK(LT.{});",
+                        vector_index, component, vector_index, component, component);
+            }
+            if (node.data.repeat.cond.IsImmediate()) {
+                if (node.data.repeat.cond.U1()) {
+                    ctx.Add("ENDREP;");
+                } else {
+                    ctx.Add("BRK;"
+                            "ENDREP;");
+                }
+            } else {
+                ctx.Add("MOV.S.CC RC,{};"
+                        "BRK(EQ.x);"
+                        "ENDREP;",
+                        eval(node.data.repeat.cond));
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::Break:
+            if (node.data.break_node.cond.IsImmediate()) {
+                if (node.data.break_node.cond.U1()) {
+                    ctx.Add("BRK;");
+                }
+            } else {
+                ctx.Add("MOV.S.CC RC,{};"
+                        "BRK (NE.x);",
+                        eval(node.data.break_node.cond));
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::Return:
+        case IR::AbstractSyntaxNode::Type::Unreachable:
+            ctx.Add("RET;");
+            break;
+        }
+    }
+    if (!ctx.reg_alloc.IsEmpty()) {
+        LOG_WARNING(Shader_GLASM, "Register leak after generating code");
+    }
+}
+
+void SetupOptions(const IR::Program& program, const Profile& profile,
+                  const RuntimeInfo& runtime_info, std::string& header) {
+    const Info& info{program.info};
+    const Stage stage{program.stage};
+
+    // TODO: Track the shared atomic ops
+    header += "OPTION NV_internal;"
+              "OPTION NV_shader_storage_buffer;"
+              "OPTION NV_gpu_program_fp64;";
+    if (info.uses_int64_bit_atomics) {
+        header += "OPTION NV_shader_atomic_int64;";
+    }
+    if (info.uses_atomic_f32_add) {
+        header += "OPTION NV_shader_atomic_float;";
+    }
+    if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
+        header += "OPTION NV_shader_atomic_fp16_vector;";
+    }
+    if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
+        info.uses_fswzadd) {
+        header += "OPTION NV_shader_thread_group;";
+    }
+    if (info.uses_subgroup_shuffles) {
+        header += "OPTION NV_shader_thread_shuffle;";
+    }
+    if (info.uses_sparse_residency) {
+        header += "OPTION EXT_sparse_texture2;";
+    }
+    const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
+                                     info.stores[IR::Attribute::Layer]};
+    if ((stage != Stage::Geometry && stores_viewport_layer) ||
+        info.stores[IR::Attribute::ViewportMask]) {
+        if (profile.support_viewport_index_layer_non_geometry) {
+            header += "OPTION NV_viewport_array2;";
+        }
+    }
+    if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
+        header += "OPTION NV_geometry_shader_passthrough;";
+    }
+    if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
+        header += "OPTION EXT_shader_image_load_formatted;";
+    }
+    if (profile.support_derivative_control) {
+        header += "OPTION ARB_derivative_control;";
+    }
+    if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
+        header += "OPTION NV_early_fragment_tests;";
+    }
+    if (stage == Stage::Fragment) {
+        header += "OPTION ARB_draw_buffers;";
+    }
+}
+
+std::string_view StageHeader(Stage stage) {
+    switch (stage) {
+    case Stage::VertexA:
+    case Stage::VertexB:
+        return "!!NVvp5.0\n";
+    case Stage::TessellationControl:
+        return "!!NVtcp5.0\n";
+    case Stage::TessellationEval:
+        return "!!NVtep5.0\n";
+    case Stage::Geometry:
+        return "!!NVgp5.0\n";
+    case Stage::Fragment:
+        return "!!NVfp5.0\n";
+    case Stage::Compute:
+        return "!!NVcp5.0\n";
+    }
+    throw InvalidArgument("Invalid stage {}", stage);
+}
+
+std::string_view InputPrimitive(InputTopology topology) {
+    switch (topology) {
+    case InputTopology::Points:
+        return "POINTS";
+    case InputTopology::Lines:
+        return "LINES";
+    case InputTopology::LinesAdjacency:
+        return "LINESS_ADJACENCY";
+    case InputTopology::Triangles:
+        return "TRIANGLES";
+    case InputTopology::TrianglesAdjacency:
+        return "TRIANGLES_ADJACENCY";
+    }
+    throw InvalidArgument("Invalid input topology {}", topology);
+}
+
+std::string_view OutputPrimitive(OutputTopology topology) {
+    switch (topology) {
+    case OutputTopology::PointList:
+        return "POINTS";
+    case OutputTopology::LineStrip:
+        return "LINE_STRIP";
+    case OutputTopology::TriangleStrip:
+        return "TRIANGLE_STRIP";
+    }
+    throw InvalidArgument("Invalid output topology {}", topology);
+}
+
+std::string_view GetTessMode(TessPrimitive primitive) {
+    switch (primitive) {
+    case TessPrimitive::Triangles:
+        return "TRIANGLES";
+    case TessPrimitive::Quads:
+        return "QUADS";
+    case TessPrimitive::Isolines:
+        return "ISOLINES";
+    }
+    throw InvalidArgument("Invalid tessellation primitive {}", primitive);
+}
+
+std::string_view GetTessSpacing(TessSpacing spacing) {
+    switch (spacing) {
+    case TessSpacing::Equal:
+        return "EQUAL";
+    case TessSpacing::FractionalOdd:
+        return "FRACTIONAL_ODD";
+    case TessSpacing::FractionalEven:
+        return "FRACTIONAL_EVEN";
+    }
+    throw InvalidArgument("Invalid tessellation spacing {}", spacing);
+}
+} // Anonymous namespace
+
+std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
+                      Bindings& bindings) {
+    EmitContext ctx{program, bindings, profile, runtime_info};
+    Precolor(program);
+    EmitCode(ctx, program);
+    std::string header{StageHeader(program.stage)};
+    SetupOptions(program, profile, runtime_info, header);
+    switch (program.stage) {
+    case Stage::TessellationControl:
+        header += fmt::format("VERTICES_OUT {};", program.invocations);
+        break;
+    case Stage::TessellationEval:
+        header += fmt::format("TESS_MODE {};"
+                              "TESS_SPACING {};"
+                              "TESS_VERTEX_ORDER {};",
+                              GetTessMode(runtime_info.tess_primitive),
+                              GetTessSpacing(runtime_info.tess_spacing),
+                              runtime_info.tess_clockwise ? "CW" : "CCW");
+        break;
+    case Stage::Geometry:
+        header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
+        if (program.is_geometry_passthrough) {
+            if (profile.support_geometry_shader_passthrough) {
+                for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+                    if (program.info.passthrough.Generic(index)) {
+                        header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
+                    }
+                }
+                if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
+                    header += "PASSTHROUGH result.position;";
+                }
+            } else {
+                LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
+            }
+        } else {
+            header +=
+                fmt::format("VERTICES_OUT {};"
+                            "PRIMITIVE_OUT {};",
+                            program.output_vertices, OutputPrimitive(program.output_topology));
+        }
+        break;
+    case Stage::Compute:
+        header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
+                              program.workgroup_size[1], program.workgroup_size[2]);
+        break;
+    default:
+        break;
+    }
+    if (program.shared_memory_size > 0) {
+        header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
+        header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
+    }
+    header += "TEMP ";
+    for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
+        header += fmt::format("R{},", index);
+    }
+    if (program.local_memory_size > 0) {
+        header += fmt::format("lmem[{}],", program.local_memory_size);
+    }
+    if (program.info.uses_fswzadd) {
+        header += "FSWZA[4],FSWZB[4],";
+    }
+    const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
+    for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
+        header += fmt::format("loop{},", index);
+    }
+    header += "RC;"
+              "LONG TEMP ";
+    for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
+        header += fmt::format("D{},", index);
+    }
+    header += "DC;";
+    if (program.info.uses_fswzadd) {
+        header += "MOV.F FSWZA[0],-1;"
+                  "MOV.F FSWZA[1],1;"
+                  "MOV.F FSWZA[2],-1;"
+                  "MOV.F FSWZA[3],0;"
+                  "MOV.F FSWZB[0],-1;"
+                  "MOV.F FSWZB[1],-1;"
+                  "MOV.F FSWZB[2],1;"
+                  "MOV.F FSWZB[3],-1;";
+    }
+    for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
+        header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
+    }
+    if (ctx.uses_y_direction) {
+        header += "PARAM y_direction[1]={state.material.front.ambient};";
+    }
+    ctx.code.insert(0, header);
+    ctx.code += "END";
+    return ctx.code;
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
new file mode 100644
index 000000000..bcb55f062
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -0,0 +1,25 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+
+[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+                                    IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+                                           IR::Program& program) {
+    Bindings binding;
+    return EmitGLASM(profile, runtime_info, program, binding);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
new file mode 100644
index 000000000..9201ccd39
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -0,0 +1,91 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+static void Alias(IR::Inst& inst, const IR::Value& value) {
+    if (value.IsImmediate()) {
+        return;
+    }
+    IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())};
+    value_inst.DestructiveAddUsage(inst.UseCount());
+    value_inst.DestructiveRemoveUsage();
+    inst.SetDefinition(value_inst.Definition<Id>());
+}
+
+void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
+    // Fake one usage to get a real register out of the condition
+    inst.DestructiveAddUsage(1);
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    const ScalarS32 input{ctx.reg_alloc.Consume(value)};
+    if (ret != input) {
+        ctx.Add("MOV.S {},{};", ret, input);
+    }
+}
+
+void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
+void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
+void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
+void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
+void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
+void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+    ctx.LongAdd("PK64.U {}.x,{};", inst, value);
+}
+
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+    ctx.Add("UP64.U {}.xy,{}.x;", inst, value);
+}
+
+void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    ctx.Add("PK2H {}.x,{};", inst, value);
+}
+
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    ctx.Add("UP2H {}.xy,{}.x;", inst, value);
+}
+
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+    ctx.LongAdd("PK64 {}.x,{};", inst, value);
+}
+
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+    ctx.Add("UP64 {}.xy,{}.x;", inst, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
new file mode 100644
index 000000000..bff0b7c1c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -0,0 +1,244 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <auto read_imm, char type, typename... Values>
+void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (std::ranges::any_of(std::array{elements...},
+                            [](const IR::Value& value) { return value.IsImmediate(); })) {
+        using Type = std::invoke_result_t<decltype(read_imm), IR::Value>;
+        const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...};
+        ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]),
+                fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3]));
+    }
+    size_t index{};
+    for (const IR::Value& element : {elements...}) {
+        if (!element.IsImmediate()) {
+            const ScalarU32 value{ctx.reg_alloc.Consume(element)};
+            ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value);
+        }
+        ++index;
+    }
+}
+
+void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (ret == composite && index == 0) {
+        // No need to do anything here, the source and destination are the same register
+        return;
+    }
+    ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]);
+}
+
+template <typename ObjectType>
+void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object,
+                     u32 index, char type) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    const char swizzle{"xyzw"[index]};
+    if (ret != composite && ret == object) {
+        // The object is aliased with the return value, so we have to use a temporary to insert
+        ctx.Add("MOV.{} RC,{};"
+                "MOV.{} RC.{},{};"
+                "MOV.{} {},RC;",
+                type, composite, type, swizzle, object, type, ret);
+    } else if (ret != composite) {
+        // The input composite is not aliased with the return value so we have to copy it before
+        // hand. But the insert object is not aliased with the return value, so we don't have to
+        // worry about that
+        ctx.Add("MOV.{} {},{};"
+                "MOV.{} {}.{},{};",
+                type, ret, composite, type, ret, swizzle, object);
+    } else {
+        // The return value is alised so we can just insert the object, it doesn't matter if it's
+        // aliased
+        ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object);
+    }
+}
+} // Anonymous namespace
+
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2) {
+    CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2);
+}
+
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3) {
+    CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+    CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite,
+                              [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite,
+                              [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite,
+                              [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+                                 [[maybe_unused]] Register e2) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+                                 [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+                                 [[maybe_unused]] Register e2, [[maybe_unused]] Register e3,
+                                 [[maybe_unused]] Register e4) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
+                               [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
+                               [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
+                               [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+                              [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+                              [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+                              [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2) {
+    CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2);
+}
+
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3) {
+    CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+    CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index) {
+    CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index) {
+    CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index) {
+    CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+                              [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+                              [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+                              [[maybe_unused]] u32 index) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
new file mode 100644
index 000000000..02c9dc6d7
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -0,0 +1,346 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+             std::string_view size) {
+    if (!binding.IsImmediate()) {
+        throw NotImplementedException("Indirect constant buffer loading");
+    }
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (offset.type == Type::U32) {
+        // Avoid reading arrays out of bounds, matching hardware's behavior
+        if (offset.imm_u32 >= 0x10'000) {
+            ctx.Add("MOV.S {},0;", ret);
+            return;
+        }
+    }
+    ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
+}
+
+bool IsInputArray(Stage stage) {
+    return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+           stage == Stage::TessellationEval;
+}
+
+std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
+    return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
+}
+
+u32 TexCoordIndex(IR::Attribute attr) {
+    return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
+}
+} // Anonymous namespace
+
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+    GetCbuf(ctx, inst, binding, offset, "U8");
+}
+
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+    GetCbuf(ctx, inst, binding, offset, "S8");
+}
+
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+    GetCbuf(ctx, inst, binding, offset, "U16");
+}
+
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+    GetCbuf(ctx, inst, binding, offset, "S16");
+}
+
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+    GetCbuf(ctx, inst, binding, offset, "U32");
+}
+
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+    GetCbuf(ctx, inst, binding, offset, "F32");
+}
+
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                      ScalarU32 offset) {
+    GetCbuf(ctx, inst, binding, offset, "U32X2");
+}
+
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) {
+    const u32 element{static_cast<u32>(attr) % 4};
+    const char swizzle{"xyzw"[element]};
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle);
+        return;
+    }
+    if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) {
+        const u32 index{TexCoordIndex(attr)};
+        ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle);
+        return;
+    }
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        ctx.Add("MOV.S {}.x,primitive.id;", inst);
+        break;
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        if (IsInputArray(ctx.stage)) {
+            ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle);
+        } else {
+            ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle);
+        }
+        break;
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA:
+        ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle);
+        break;
+    case IR::Attribute::PointSpriteS:
+    case IR::Attribute::PointSpriteT:
+        ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle);
+        break;
+    case IR::Attribute::TessellationEvaluationPointU:
+    case IR::Attribute::TessellationEvaluationPointV:
+        ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle);
+        break;
+    case IR::Attribute::InstanceId:
+        ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
+        break;
+    case IR::Attribute::VertexId:
+        ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
+        break;
+    case IR::Attribute::FrontFace:
+        ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
+        break;
+    default:
+        throw NotImplementedException("Get attribute {}", attr);
+    }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
+                      [[maybe_unused]] ScalarU32 vertex) {
+    const u32 element{static_cast<u32>(attr) % 4};
+    const char swizzle{"xyzw"[element]};
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value);
+        return;
+    }
+    if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) {
+        const u32 index{TexCoordIndex(attr)};
+        ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value);
+        return;
+    }
+    switch (attr) {
+    case IR::Attribute::Layer:
+        if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
+            ctx.Add("MOV.F result.layer.x,{};", value);
+        } else {
+            LOG_WARNING(Shader_GLASM,
+                        "Layer stored outside of geometry shader not supported by device");
+        }
+        break;
+    case IR::Attribute::ViewportIndex:
+        if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
+            ctx.Add("MOV.F result.viewport.x,{};", value);
+        } else {
+            LOG_WARNING(Shader_GLASM,
+                        "Viewport stored outside of geometry shader not supported by device");
+        }
+        break;
+    case IR::Attribute::ViewportMask:
+        // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage.
+        if (ctx.profile.support_viewport_index_layer_non_geometry) {
+            ctx.Add("MOV.F result.viewportmask[0].x,{};", value);
+        } else {
+            LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask");
+        }
+        break;
+    case IR::Attribute::PointSize:
+        ctx.Add("MOV.F result.pointsize.x,{};", value);
+        break;
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        ctx.Add("MOV.F result.position.{},{};", swizzle, value);
+        break;
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA:
+        ctx.Add("MOV.F result.color.{},{};", swizzle, value);
+        break;
+    case IR::Attribute::ColorFrontSpecularR:
+    case IR::Attribute::ColorFrontSpecularG:
+    case IR::Attribute::ColorFrontSpecularB:
+    case IR::Attribute::ColorFrontSpecularA:
+        ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value);
+        break;
+    case IR::Attribute::ColorBackDiffuseR:
+    case IR::Attribute::ColorBackDiffuseG:
+    case IR::Attribute::ColorBackDiffuseB:
+    case IR::Attribute::ColorBackDiffuseA:
+        ctx.Add("MOV.F result.color.back.{},{};", swizzle, value);
+        break;
+    case IR::Attribute::ColorBackSpecularR:
+    case IR::Attribute::ColorBackSpecularG:
+    case IR::Attribute::ColorBackSpecularB:
+    case IR::Attribute::ColorBackSpecularA:
+        ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value);
+        break;
+    case IR::Attribute::FogCoordinate:
+        ctx.Add("MOV.F result.fogcoord.x,{};", value);
+        break;
+    case IR::Attribute::ClipDistance0:
+    case IR::Attribute::ClipDistance1:
+    case IR::Attribute::ClipDistance2:
+    case IR::Attribute::ClipDistance3:
+    case IR::Attribute::ClipDistance4:
+    case IR::Attribute::ClipDistance5:
+    case IR::Attribute::ClipDistance6:
+    case IR::Attribute::ClipDistance7: {
+        const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
+        ctx.Add("MOV.F result.clip[{}].x,{};", index, value);
+        break;
+    }
+    default:
+        throw NotImplementedException("Set attribute {}", attr);
+    }
+}
+
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) {
+    // RC.x = base_index
+    // RC.y = masked_index
+    // RC.z = compare_index
+    ctx.Add("SHR.S RC.x,{},2;"
+            "AND.S RC.y,RC.x,3;"
+            "SHR.S RC.z,{},4;",
+            offset, offset);
+
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    u32 num_endifs{};
+    const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) {
+        ++num_endifs;
+        ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index
+                "IF NE.w;"
+                // X
+                "SEQ.S.CC RC.w,RC.y,0;"
+                "IF NE.w;"
+                "MOV {}.x,{};"
+                "ELSE;"
+                // Y
+                "SEQ.S.CC RC.w,RC.y,1;"
+                "IF NE.w;"
+                "MOV {}.x,{};"
+                "ELSE;"
+                // Z
+                "SEQ.S.CC RC.w,RC.y,2;"
+                "IF NE.w;"
+                "MOV {}.x,{};"
+                "ELSE;"
+                // W
+                "MOV {}.x,{};"
+                "ENDIF;"
+                "ENDIF;"
+                "ENDIF;"
+                "ELSE;",
+                compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]);
+    }};
+    const auto read_swizzled{[&](u32 compare_index, std::string_view value) {
+        const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value),
+                                fmt::format("{}.z", value), fmt::format("{}.w", value)};
+        read(compare_index, values);
+    }};
+    if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) {
+        const u32 index{static_cast<u32>(IR::Attribute::PositionX)};
+        if (IsInputArray(ctx.stage)) {
+            read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex)));
+        } else {
+            read_swizzled(index, fmt::format("{}.position", ctx.attrib_name));
+        }
+    }
+    for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
+        if (!ctx.info.loads.Generic(index)) {
+            continue;
+        }
+        read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex)));
+    }
+    for (u32 i = 0; i < num_endifs; ++i) {
+        ctx.Add("ENDIF;");
+    }
+}
+
+void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset,
+                             [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
+    if (!IR::IsGeneric(patch)) {
+        throw NotImplementedException("Non-generic patch load");
+    }
+    const u32 index{IR::GenericPatchIndex(patch)};
+    const u32 element{IR::GenericPatchElement(patch)};
+    const char swizzle{"xyzw"[element]};
+    const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""};
+    ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) {
+    if (IR::IsGeneric(patch)) {
+        const u32 index{IR::GenericPatchIndex(patch)};
+        const u32 element{IR::GenericPatchElement(patch)};
+        ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value);
+        return;
+    }
+    switch (patch) {
+    case IR::Patch::TessellationLodLeft:
+    case IR::Patch::TessellationLodRight:
+    case IR::Patch::TessellationLodTop:
+    case IR::Patch::TessellationLodBottom: {
+        const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+        ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value);
+        break;
+    }
+    case IR::Patch::TessellationLodInteriorU:
+        ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value);
+        break;
+    case IR::Patch::TessellationLodInteriorV:
+        ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value);
+        break;
+    default:
+        throw NotImplementedException("Patch {}", patch);
+    }
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) {
+    ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) {
+    ctx.Add("MOV.S result.samplemask.x,{};", value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) {
+    ctx.Add("MOV.F result.depth.z,{};", value);
+}
+
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) {
+    ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset);
+}
+
+void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) {
+    ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
new file mode 100644
index 000000000..ccdf1cbc8
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
@@ -0,0 +1,231 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+std::string_view FpRounding(IR::FpRounding fp_rounding) {
+    switch (fp_rounding) {
+    case IR::FpRounding::DontCare:
+        return "";
+    case IR::FpRounding::RN:
+        return ".ROUND";
+    case IR::FpRounding::RZ:
+        return ".TRUNC";
+    case IR::FpRounding::RM:
+        return ".FLR";
+    case IR::FpRounding::RP:
+        return ".CEIL";
+    }
+    throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding);
+}
+
+template <typename InputType>
+void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest,
+             std::string_view src, bool is_long_result) {
+    const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)};
+    const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)};
+    ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value);
+}
+} // Anonymous namespace
+
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "S16", "F16", false);
+}
+
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Convert(ctx, inst, value, "S16", "F32", false);
+}
+
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    Convert(ctx, inst, value, "S16", "F64", false);
+}
+
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "S32", "F16", false);
+}
+
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Convert(ctx, inst, value, "S32", "F32", false);
+}
+
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    Convert(ctx, inst, value, "S32", "F64", false);
+}
+
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "S64", "F16", true);
+}
+
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Convert(ctx, inst, value, "S64", "F32", true);
+}
+
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    Convert(ctx, inst, value, "S64", "F64", true);
+}
+
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "U16", "F16", false);
+}
+
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Convert(ctx, inst, value, "U16", "F32", false);
+}
+
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    Convert(ctx, inst, value, "U16", "F64", false);
+}
+
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "U32", "F16", false);
+}
+
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Convert(ctx, inst, value, "U32", "F32", false);
+}
+
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    Convert(ctx, inst, value, "U32", "F64", false);
+}
+
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "U64", "F16", true);
+}
+
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Convert(ctx, inst, value, "U64", "F32", true);
+}
+
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    Convert(ctx, inst, value, "U64", "F64", true);
+}
+
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+    Convert(ctx, inst, value, "U64", "U32", true);
+}
+
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "U32", "U64", false);
+}
+
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Convert(ctx, inst, value, "F16", "F32", false);
+}
+
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F32", "F16", false);
+}
+
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    Convert(ctx, inst, value, "F32", "F64", false);
+}
+
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Convert(ctx, inst, value, "F64", "F32", true);
+}
+
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F16", "S8", false);
+}
+
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F16", "S16", false);
+}
+
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    Convert(ctx, inst, value, "F16", "S32", false);
+}
+
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F16", "S64", false);
+}
+
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F16", "U8", false);
+}
+
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F16", "U16", false);
+}
+
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+    Convert(ctx, inst, value, "F16", "U32", false);
+}
+
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F16", "U64", false);
+}
+
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F32", "S8", false);
+}
+
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F32", "S16", false);
+}
+
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    Convert(ctx, inst, value, "F32", "S32", false);
+}
+
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F32", "S64", false);
+}
+
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F32", "U8", false);
+}
+
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F32", "U16", false);
+}
+
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+    Convert(ctx, inst, value, "F32", "U32", false);
+}
+
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F32", "U64", false);
+}
+
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F64", "S8", true);
+}
+
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F64", "S16", true);
+}
+
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    Convert(ctx, inst, value, "F64", "S32", true);
+}
+
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F64", "S64", true);
+}
+
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F64", "U8", true);
+}
+
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F64", "U16", true);
+}
+
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+    Convert(ctx, inst, value, "F64", "U32", true);
+}
+
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    Convert(ctx, inst, value, "F64", "U64", true);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
new file mode 100644
index 000000000..4ed58619d
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
@@ -0,0 +1,414 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <typename InputType>
+void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op,
+             std::string_view type, bool ordered, bool inequality = false) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs);
+    if (ordered && inequality) {
+        ctx.Add("SEQ.{} RC.y,{},{};"
+                "SEQ.{} RC.z,{},{};"
+                "AND.U RC.x,RC.x,RC.y;"
+                "AND.U RC.x,RC.x,RC.z;"
+                "SNE.S {}.x,RC.x,0;",
+                type, lhs, lhs, type, rhs, rhs, ret);
+    } else if (ordered) {
+        ctx.Add("SNE.S {}.x,RC.x,0;", ret);
+    } else {
+        ctx.Add("SNE.{} RC.y,{},{};"
+                "SNE.{} RC.z,{},{};"
+                "OR.U RC.x,RC.x,RC.y;"
+                "OR.U RC.x,RC.x,RC.z;"
+                "SNE.S {}.x,RC.x,0;",
+                type, lhs, lhs, type, rhs, rhs, ret);
+    }
+}
+
+template <typename InputType>
+void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value,
+           InputType max_value, std::string_view type) {
+    // Call MAX first to properly clamp nan to min_value instead
+    ctx.Add("MAX.{} RC.x,{},{};"
+            "MIN.{} {}.x,RC.x,{};",
+            type, min_value, value, type, ret, max_value);
+}
+
+std::string_view Precise(IR::Inst& inst) {
+    const bool precise{inst.Flags<IR::FpControl>().no_contraction};
+    return precise ? ".PREC" : "";
+}
+} // Anonymous namespace
+
+void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("MOV.F {}.x,|{}|;", inst, value);
+}
+
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value);
+}
+
+void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+    ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
+}
+
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+    ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
+}
+
+void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] Register a, [[maybe_unused]] Register b,
+                 [[maybe_unused]] Register c) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) {
+    ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c);
+}
+
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) {
+    ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c);
+}
+
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+    ctx.Add("MAX.F {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+    ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+    ctx.Add("MIN.F {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+    ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+    ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
+}
+
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+    ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
+}
+
+void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) {
+    ctx.Add("MOV.F {}.x,-{};", inst, value);
+}
+
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value);
+}
+
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("SIN {}.x,{};", inst, value);
+}
+
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("COS {}.x,{};", inst, value);
+}
+
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("EX2 {}.x,{};", inst, value);
+}
+
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("LG2 {}.x,{};", inst, value);
+}
+
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("RCP {}.x,{};", inst, value);
+}
+
+void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("RSQ {}.x,{};", inst, value);
+}
+
+void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret);
+}
+
+void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("MOV.F.SAT {}.x,{};", inst, value);
+}
+
+void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value,
+                   [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
+                   ScalarF32 max_value) {
+    Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F");
+}
+
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
+                   ScalarF64 max_value) {
+    Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64");
+}
+
+void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("ROUND.F {}.x,{};", inst, value);
+}
+
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("FLR.F {}.x,{};", inst, value);
+}
+
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    ctx.LongAdd("FLR.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("CEIL.F {}.x,{};", inst, value);
+}
+
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    ctx.Add("TRUNC.F {}.x,{};", inst, value);
+}
+
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                      [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SEQ", "F", true);
+}
+
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true);
+}
+
+void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                        [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SEQ", "F", false);
+}
+
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false);
+}
+
+void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                         [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true);
+}
+
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true);
+}
+
+void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                           [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true);
+}
+
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true);
+}
+
+void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                         [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SLT", "F", true);
+}
+
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SLT", "F64", true);
+}
+
+void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                           [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SLT", "F", false);
+}
+
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SLT", "F64", false);
+}
+
+void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                            [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SGT", "F", true);
+}
+
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SGT", "F64", true);
+}
+
+void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                              [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SGT", "F", false);
+}
+
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SGT", "F64", false);
+}
+
+void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                              [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SLE", "F", true);
+}
+
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SLE", "F64", true);
+}
+
+void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                                [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SLE", "F", false);
+}
+
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SLE", "F64", false);
+}
+
+void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                                 [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SGE", "F", true);
+}
+
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SGE", "F64", true);
+}
+
+void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+                                   [[maybe_unused]] Register rhs) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SGE", "F", false);
+}
+
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+    Compare(ctx, inst, lhs, rhs, "SGE", "F64", false);
+}
+
+void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+    Compare(ctx, inst, value, value, "SNE", "F", true, false);
+}
+
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+    Compare(ctx, inst, value, value, "SNE", "F64", true, false);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
new file mode 100644
index 000000000..09e3a9b82
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -0,0 +1,850 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+struct ScopedRegister {
+    ScopedRegister() = default;
+    ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{&reg_alloc_}, reg{reg_alloc->AllocReg()} {}
+
+    ~ScopedRegister() {
+        if (reg_alloc) {
+            reg_alloc->FreeReg(reg);
+        }
+    }
+
+    ScopedRegister& operator=(ScopedRegister&& rhs) noexcept {
+        if (reg_alloc) {
+            reg_alloc->FreeReg(reg);
+        }
+        reg_alloc = std::exchange(rhs.reg_alloc, nullptr);
+        reg = rhs.reg;
+        return *this;
+    }
+
+    ScopedRegister(ScopedRegister&& rhs) noexcept
+        : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {}
+
+    ScopedRegister& operator=(const ScopedRegister&) = delete;
+    ScopedRegister(const ScopedRegister&) = delete;
+
+    RegAlloc* reg_alloc{};
+    Register reg;
+};
+
+std::string Texture(EmitContext& ctx, IR::TextureInstInfo info,
+                    [[maybe_unused]] const IR::Value& index) {
+    // FIXME: indexed reads
+    if (info.type == TextureType::Buffer) {
+        return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index));
+    } else {
+        return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index));
+    }
+}
+
+std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
+                  [[maybe_unused]] const IR::Value& index) {
+    // FIXME: indexed reads
+    if (info.type == TextureType::Buffer) {
+        return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index));
+    } else {
+        return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index));
+    }
+}
+
+std::string_view TextureType(IR::TextureInstInfo info) {
+    if (info.is_depth) {
+        switch (info.type) {
+        case TextureType::Color1D:
+            return "SHADOW1D";
+        case TextureType::ColorArray1D:
+            return "SHADOWARRAY1D";
+        case TextureType::Color2D:
+            return "SHADOW2D";
+        case TextureType::ColorArray2D:
+            return "SHADOWARRAY2D";
+        case TextureType::Color3D:
+            return "SHADOW3D";
+        case TextureType::ColorCube:
+            return "SHADOWCUBE";
+        case TextureType::ColorArrayCube:
+            return "SHADOWARRAYCUBE";
+        case TextureType::Buffer:
+            return "SHADOWBUFFER";
+        }
+    } else {
+        switch (info.type) {
+        case TextureType::Color1D:
+            return "1D";
+        case TextureType::ColorArray1D:
+            return "ARRAY1D";
+        case TextureType::Color2D:
+            return "2D";
+        case TextureType::ColorArray2D:
+            return "ARRAY2D";
+        case TextureType::Color3D:
+            return "3D";
+        case TextureType::ColorCube:
+            return "CUBE";
+        case TextureType::ColorArrayCube:
+            return "ARRAYCUBE";
+        case TextureType::Buffer:
+            return "BUFFER";
+        }
+    }
+    throw InvalidArgument("Invalid texture type {}", info.type.Value());
+}
+
+std::string Offset(EmitContext& ctx, const IR::Value& offset) {
+    if (offset.IsEmpty()) {
+        return "";
+    }
+    return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)});
+}
+
+std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx,
+                                                           const IR::Value& offset2) {
+    if (offset2.IsEmpty()) {
+        return {};
+    } else {
+        return {ctx.reg_alloc, ctx.reg_alloc};
+    }
+}
+
+void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1,
+                    const IR::Value& offset2) {
+    const Register offsets_a{ctx.reg_alloc.Consume(offset1)};
+    const Register offsets_b{ctx.reg_alloc.Consume(offset2)};
+    // Input swizzle:  [XYXY] [XYXY]
+    // Output swizzle: [XXXX] [YYYY]
+    ctx.Add("MOV {}.x,{}.x;"
+            "MOV {}.y,{}.z;"
+            "MOV {}.z,{}.x;"
+            "MOV {}.w,{}.z;"
+            "MOV {}.x,{}.y;"
+            "MOV {}.y,{}.w;"
+            "MOV {}.z,{}.y;"
+            "MOV {}.w,{}.w;",
+            off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y,
+            offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b);
+}
+
+std::string GradOffset(const IR::Value& offset) {
+    if (offset.IsImmediate()) {
+        LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate");
+        return "";
+    }
+    IR::Inst* const vector{offset.InstRecursive()};
+    if (!vector->AreAllArgsImmediates()) {
+        LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate");
+        return "";
+    }
+    switch (vector->NumArgs()) {
+    case 1:
+        return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32()));
+    case 2:
+        return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()),
+                           static_cast<s32>(vector->Arg(1).U32()));
+    default:
+        throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs());
+    }
+}
+
+std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) {
+    if (coord.IsImmediate()) {
+        ScopedRegister scoped_reg(ctx.reg_alloc);
+        ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)});
+        return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)};
+    }
+    std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
+    if (coord.InstRecursive()->HasUses()) {
+        // Move non-dead coords to a separate register, although this should never happen because
+        // vectors are only assembled for immediate texture instructions
+        ctx.Add("MOV.F RC,{};", coord_vec);
+        coord_vec = "RC";
+    }
+    return {std::move(coord_vec), ScopedRegister{}};
+}
+
+void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
+    if (!sparse_inst) {
+        return;
+    }
+    const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
+    ctx.Add("MOV.S {},-1;"
+            "MOV.S {}(NONRESIDENT),0;",
+            sparse_ret, sparse_ret);
+}
+
+std::string_view FormatStorage(ImageFormat format) {
+    switch (format) {
+    case ImageFormat::Typeless:
+        return "U";
+    case ImageFormat::R8_UINT:
+        return "U8";
+    case ImageFormat::R8_SINT:
+        return "S8";
+    case ImageFormat::R16_UINT:
+        return "U16";
+    case ImageFormat::R16_SINT:
+        return "S16";
+    case ImageFormat::R32_UINT:
+        return "U32";
+    case ImageFormat::R32G32_UINT:
+        return "U32X2";
+    case ImageFormat::R32G32B32A32_UINT:
+        return "U32X4";
+    }
+    throw InvalidArgument("Invalid image format {}", format);
+}
+
+template <typename T>
+void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value,
+                 std::string_view op) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const std::string_view type{TextureType(info)};
+    const std::string image{Image(ctx, info, index)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
+}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+    if (sparse_inst) {
+        sparse_inst->Invalidate();
+    }
+    return sparse_inst;
+}
+} // Anonymous namespace
+
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string texture{Texture(ctx, info, index)};
+    const std::string offset_vec{Offset(ctx, offset)};
+    const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (info.has_bias) {
+        if (info.type == TextureType::ColorArrayCube) {
+            ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec,
+                    bias_lc, texture, offset_vec);
+        } else {
+            if (info.has_lod_clamp) {
+                ctx.Add("MOV.F {}.w,{}.x;"
+                        "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+                        coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type,
+                        offset_vec);
+            } else {
+                ctx.Add("MOV.F {}.w,{}.x;"
+                        "TXB.F{} {},{},{},{}{};",
+                        coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+            }
+        }
+    } else {
+        if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
+            ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec,
+                    bias_lc, texture, offset_vec);
+        } else {
+            ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
+                    type, offset_vec);
+        }
+    }
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string texture{Texture(ctx, info, index)};
+    const std::string offset_vec{Offset(ctx, offset)};
+    const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (info.type == TextureType::ColorArrayCube) {
+        ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture,
+                offset_vec);
+    } else {
+        ctx.Add("MOV.F {}.w,{};"
+                "TXL.F{} {},{},{},{}{};",
+                coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+    }
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                    const IR::Value& coord, const IR::Value& dref,
+                                    const IR::Value& bias_lc, const IR::Value& offset) {
+    // Allocate early to avoid aliases
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    ScopedRegister staging;
+    if (info.type == TextureType::ColorArrayCube) {
+        staging = ScopedRegister{ctx.reg_alloc};
+    }
+    const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+    const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string texture{Texture(ctx, info, index)};
+    const std::string offset_vec{Offset(ctx, offset)};
+    const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (info.has_bias) {
+        if (info.has_lod_clamp) {
+            switch (info.type) {
+            case TextureType::Color1D:
+            case TextureType::ColorArray1D:
+            case TextureType::Color2D:
+                ctx.Add("MOV.F {}.z,{};"
+                        "MOV.F {}.w,{}.x;"
+                        "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+                        coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+                        bias_lc_vec, texture, type, offset_vec);
+                break;
+            case TextureType::ColorArray2D:
+            case TextureType::ColorCube:
+                ctx.Add("MOV.F {}.w,{};"
+                        "TXB.F.LODCLAMP{} {},{},{},{},{}{};",
+                        coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+                        offset_vec);
+                break;
+            default:
+                throw NotImplementedException("Invalid type {} with bias and lod clamp",
+                                              info.type.Value());
+            }
+        } else {
+            switch (info.type) {
+            case TextureType::Color1D:
+            case TextureType::ColorArray1D:
+            case TextureType::Color2D:
+                ctx.Add("MOV.F {}.z,{};"
+                        "MOV.F {}.w,{}.x;"
+                        "TXB.F{} {},{},{},{}{};",
+                        coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+                        texture, type, offset_vec);
+                break;
+            case TextureType::ColorArray2D:
+            case TextureType::ColorCube:
+                ctx.Add("MOV.F {}.w,{};"
+                        "TXB.F{} {},{},{},{},{}{};",
+                        coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+                        offset_vec);
+                break;
+            case TextureType::ColorArrayCube:
+                ctx.Add("MOV.F {}.x,{};"
+                        "MOV.F {}.y,{}.x;"
+                        "TXB.F{} {},{},{},{},{}{};",
+                        staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+                        staging.reg, texture, type, offset_vec);
+                break;
+            default:
+                throw NotImplementedException("Invalid type {}", info.type.Value());
+            }
+        }
+    } else {
+        if (info.has_lod_clamp) {
+            if (info.type != TextureType::ColorArrayCube) {
+                const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+                                     info.type == TextureType::ColorCube};
+                const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+                ctx.Add("MOV.F {}.{},{};"
+                        "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+                        coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec,
+                        texture, type, offset_vec);
+            } else {
+                ctx.Add("MOV.F {}.x,{};"
+                        "MOV.F {}.y,{};"
+                        "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+                        staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+                        staging.reg, texture, type, offset_vec);
+            }
+        } else {
+            if (info.type != TextureType::ColorArrayCube) {
+                const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+                                     info.type == TextureType::ColorCube};
+                const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+                ctx.Add("MOV.F {}.{},{};"
+                        "TEX.F{} {},{},{},{}{};",
+                        coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture,
+                        type, offset_vec);
+            } else {
+                ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture,
+                        type, offset_vec);
+            }
+        }
+    }
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                    const IR::Value& coord, const IR::Value& dref,
+                                    const IR::Value& lod, const IR::Value& offset) {
+    // Allocate early to avoid aliases
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    ScopedRegister staging;
+    if (info.type == TextureType::ColorArrayCube) {
+        staging = ScopedRegister{ctx.reg_alloc};
+    }
+    const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+    const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string texture{Texture(ctx, info, index)};
+    const std::string offset_vec{Offset(ctx, offset)};
+    const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    switch (info.type) {
+    case TextureType::Color1D:
+    case TextureType::ColorArray1D:
+    case TextureType::Color2D:
+        ctx.Add("MOV.F {}.z,{};"
+                "MOV.F {}.w,{};"
+                "TXL.F{} {},{},{},{}{};",
+                coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type,
+                offset_vec);
+        break;
+    case TextureType::ColorArray2D:
+    case TextureType::ColorCube:
+        ctx.Add("MOV.F {}.w,{};"
+                "TXL.F{} {},{},{},{},{}{};",
+                coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type,
+                offset_vec);
+        break;
+    case TextureType::ColorArrayCube:
+        ctx.Add("MOV.F {}.x,{};"
+                "MOV.F {}.y,{};"
+                "TXL.F{} {},{},{},{},{}{};",
+                staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec,
+                staging.reg, texture, type, offset_vec);
+        break;
+    default:
+        throw NotImplementedException("Invalid type {}", info.type.Value());
+    }
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                     const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) {
+    // Allocate offsets early so they don't overwrite any consumed register
+    const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const char comp{"xyzw"[info.gather_component]};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string texture{Texture(ctx, info, index)};
+    const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (offset2.IsEmpty()) {
+        const std::string offset_vec{Offset(ctx, offset)};
+        ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type,
+                offset_vec);
+    } else {
+        SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+        ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg,
+                texture, comp, type);
+    }
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                         const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+                         const IR::Value& dref) {
+    // FIXME: This instruction is not working as expected
+
+    // Allocate offsets early so they don't overwrite any consumed register
+    const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string texture{Texture(ctx, info, index)};
+    const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+    const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    std::string args;
+    switch (info.type) {
+    case TextureType::Color2D:
+        ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value);
+        args = fmt::to_string(coord_vec);
+        break;
+    case TextureType::ColorArray2D:
+    case TextureType::ColorCube:
+        ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value);
+        args = fmt::to_string(coord_vec);
+        break;
+    case TextureType::ColorArrayCube:
+        args = fmt::format("{},{}", coord_vec, dref_value);
+        break;
+    default:
+        throw NotImplementedException("Invalid type {}", info.type.Value());
+    }
+    if (offset2.IsEmpty()) {
+        const std::string offset_vec{Offset(ctx, offset)};
+        ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec);
+    } else {
+        SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+        ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture,
+                type);
+    }
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                    const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string texture{Texture(ctx, info, index)};
+    const std::string offset_vec{Offset(ctx, offset)};
+    const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (info.type == TextureType::Buffer) {
+        ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
+    } else if (ms.type != Type::Void) {
+        ctx.Add("MOV.S {}.w,{};"
+                "TXFMS.F{} {},{},{},{}{};",
+                coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+    } else {
+        ctx.Add("MOV.S {}.w,{};"
+                "TXF.F{} {},{},{},{}{};",
+                coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+    }
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                              ScalarS32 lod) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const std::string texture{Texture(ctx, info, index)};
+    const std::string_view type{TextureType(info)};
+    ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
+}
+
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const std::string texture{Texture(ctx, info, index)};
+    const std::string_view type{TextureType(info)};
+    ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type);
+}
+
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                       const IR::Value& coord, const IR::Value& derivatives,
+                       const IR::Value& offset, const IR::Value& lod_clamp) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    ScopedRegister dpdx, dpdy;
+    const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+    if (multi_component) {
+        // Allocate this early to avoid aliasing other registers
+        dpdx = ScopedRegister{ctx.reg_alloc};
+        dpdy = ScopedRegister{ctx.reg_alloc};
+    }
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string texture{Texture(ctx, info, index)};
+    const std::string offset_vec{GradOffset(offset)};
+    const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+    const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (multi_component) {
+        ctx.Add("MOV.F {}.x,{}.x;"
+                "MOV.F {}.y,{}.z;"
+                "MOV.F {}.x,{}.y;"
+                "MOV.F {}.y,{}.w;",
+                dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
+                dpdy.reg, derivatives_vec);
+        if (info.has_lod_clamp) {
+            const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
+            ctx.Add("MOV.F {}.w,{};"
+                    "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
+                    dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+                    texture, type, offset_vec);
+        } else {
+            ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+                    texture, type, offset_vec);
+        }
+    } else {
+        ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
+                derivatives_vec, texture, type, offset_vec);
+    }
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const std::string_view format{FormatStorage(info.image_format)};
+    const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view type{TextureType(info)};
+    const std::string image{Image(ctx, info, index)};
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type);
+    StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                    Register color) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const std::string_view format{FormatStorage(info.image_format)};
+    const std::string_view type{TextureType(info)};
+    const std::string image{Image(ctx, info, index)};
+    ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
+}
+
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
+}
+
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarS32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "MIN.S32");
+}
+
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "MIN.U32");
+}
+
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarS32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "MAX.S32");
+}
+
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "MAX.U32");
+}
+
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                          ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32");
+}
+
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                          ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32");
+}
+
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                          ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "AND.U32");
+}
+
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                         ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "OR.U32");
+}
+
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                          ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "XOR.U32");
+}
+
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                               Register coord, ScalarU32 value) {
+    ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32");
+}
+
+void EmitBindlessImageSampleImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGather(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGatherDref(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageFetch(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryDimensions(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGradient(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageRead(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageWrite(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGather(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGatherDref(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageFetch(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryDimensions(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGradient(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageRead(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageWrite(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicIAdd32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMin32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMin32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMax32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMax32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicInc32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicDec32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicAnd32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicOr32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicXor32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicExchange32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicIAdd32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMin32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMin32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMax32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMax32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicInc32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicDec32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicAnd32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicOr32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicXor32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicExchange32(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
new file mode 100644
index 000000000..12afda43b
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -0,0 +1,625 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext;
+
+// Microinstruction emitters
+void EmitPhi(EmitContext& ctx, IR::Inst& inst);
+void EmitVoid(EmitContext& ctx);
+void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitReference(EmitContext&, const IR::Value& value);
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value);
+void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value);
+void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
+void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value);
+void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value);
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       ScalarU32 offset);
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       ScalarU32 offset);
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        ScalarU32 offset);
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        ScalarU32 offset);
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       ScalarU32 offset);
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       ScalarU32 offset);
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        ScalarU32 offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                        ScalarU32 value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                        ScalarS32 value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                         ScalarU32 value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                         ScalarS32 value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                        ScalarU32 value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                        Register value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                         Register value);
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value);
+void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value);
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2);
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2);
+void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3);
+void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3,
+                                 Register e4);
+void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2);
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+                  ScalarS32 false_value);
+void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
+void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+                   ScalarS32 false_value);
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
+                   Register false_value);
+void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+                   ScalarS32 false_value);
+void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
+void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitPackFloat2x16(EmitContext& ctx, Register value);
+void EmitUnpackFloat2x16(EmitContext& ctx, Register value);
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c);
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c);
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c);
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPNeg16(EmitContext& ctx, Register value);
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value);
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecip64(EmitContext& ctx, Register value);
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecipSqrt64(EmitContext& ctx, Register value);
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPSaturate16(EmitContext& ctx, Register value);
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPSaturate64(EmitContext& ctx, Register value);
+void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value);
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
+                   ScalarF32 max_value);
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
+                   ScalarF64 max_value);
+void EmitFPRoundEven16(EmitContext& ctx, Register value);
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPFloor16(EmitContext& ctx, Register value);
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPCeil16(EmitContext& ctx, Register value);
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPTrunc16(EmitContext& ctx, Register value);
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPIsNan16(EmitContext& ctx, Register value);
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift);
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+                             ScalarU32 shift);
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift);
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+                                ScalarS32 shift);
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
+                        ScalarS32 offset, ScalarS32 count);
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
+                          ScalarS32 count);
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
+                          ScalarU32 count);
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max);
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max);
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value);
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarS32 value);
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value);
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarS32 value);
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value);
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value);
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value);
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value);
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                          ScalarU32 value);
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value);
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                                ScalarU32 value);
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                                Register value);
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarS32 value);
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarS32 value);
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value);
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value);
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value);
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value);
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value);
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, Register value);
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           ScalarU32 offset, Register value);
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, Register value);
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 ScalarU32 offset, Register value);
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarF32 value);
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value);
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value);
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value);
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value);
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value);
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value);
+void EmitGlobalAtomicIAdd32(EmitContext& ctx);
+void EmitGlobalAtomicSMin32(EmitContext& ctx);
+void EmitGlobalAtomicUMin32(EmitContext& ctx);
+void EmitGlobalAtomicSMax32(EmitContext& ctx);
+void EmitGlobalAtomicUMax32(EmitContext& ctx);
+void EmitGlobalAtomicInc32(EmitContext& ctx);
+void EmitGlobalAtomicDec32(EmitContext& ctx);
+void EmitGlobalAtomicAnd32(EmitContext& ctx);
+void EmitGlobalAtomicOr32(EmitContext& ctx);
+void EmitGlobalAtomicXor32(EmitContext& ctx);
+void EmitGlobalAtomicExchange32(EmitContext& ctx);
+void EmitGlobalAtomicIAdd64(EmitContext& ctx);
+void EmitGlobalAtomicSMin64(EmitContext& ctx);
+void EmitGlobalAtomicUMin64(EmitContext& ctx);
+void EmitGlobalAtomicSMax64(EmitContext& ctx);
+void EmitGlobalAtomicUMax64(EmitContext& ctx);
+void EmitGlobalAtomicInc64(EmitContext& ctx);
+void EmitGlobalAtomicDec64(EmitContext& ctx);
+void EmitGlobalAtomicAnd64(EmitContext& ctx);
+void EmitGlobalAtomicOr64(EmitContext& ctx);
+void EmitGlobalAtomicXor64(EmitContext& ctx);
+void EmitGlobalAtomicExchange64(EmitContext& ctx);
+void EmitGlobalAtomicAddF32(EmitContext& ctx);
+void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitBindlessImageSampleImplicitLod(EmitContext&);
+void EmitBindlessImageSampleExplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+void EmitBindlessImageGather(EmitContext&);
+void EmitBindlessImageGatherDref(EmitContext&);
+void EmitBindlessImageFetch(EmitContext&);
+void EmitBindlessImageQueryDimensions(EmitContext&);
+void EmitBindlessImageQueryLod(EmitContext&);
+void EmitBindlessImageGradient(EmitContext&);
+void EmitBindlessImageRead(EmitContext&);
+void EmitBindlessImageWrite(EmitContext&);
+void EmitBoundImageSampleImplicitLod(EmitContext&);
+void EmitBoundImageSampleExplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+void EmitBoundImageGather(EmitContext&);
+void EmitBoundImageGatherDref(EmitContext&);
+void EmitBoundImageFetch(EmitContext&);
+void EmitBoundImageQueryDimensions(EmitContext&);
+void EmitBoundImageQueryLod(EmitContext&);
+void EmitBoundImageGradient(EmitContext&);
+void EmitBoundImageRead(EmitContext&);
+void EmitBoundImageWrite(EmitContext&);
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                const IR::Value& coord, Register bias_lc, const IR::Value& offset);
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                const IR::Value& coord, ScalarF32 lod, const IR::Value& offset);
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                    const IR::Value& coord, const IR::Value& dref,
+                                    const IR::Value& bias_lc, const IR::Value& offset);
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                    const IR::Value& coord, const IR::Value& dref,
+                                    const IR::Value& lod, const IR::Value& offset);
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                     const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2);
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                         const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+                         const IR::Value& dref);
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                    const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                              ScalarS32 lod);
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                       const IR::Value& coord, const IR::Value& derivatives,
+                       const IR::Value& offset, const IR::Value& lod_clamp);
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                    Register color);
+void EmitBindlessImageAtomicIAdd32(EmitContext&);
+void EmitBindlessImageAtomicSMin32(EmitContext&);
+void EmitBindlessImageAtomicUMin32(EmitContext&);
+void EmitBindlessImageAtomicSMax32(EmitContext&);
+void EmitBindlessImageAtomicUMax32(EmitContext&);
+void EmitBindlessImageAtomicInc32(EmitContext&);
+void EmitBindlessImageAtomicDec32(EmitContext&);
+void EmitBindlessImageAtomicAnd32(EmitContext&);
+void EmitBindlessImageAtomicOr32(EmitContext&);
+void EmitBindlessImageAtomicXor32(EmitContext&);
+void EmitBindlessImageAtomicExchange32(EmitContext&);
+void EmitBoundImageAtomicIAdd32(EmitContext&);
+void EmitBoundImageAtomicSMin32(EmitContext&);
+void EmitBoundImageAtomicUMin32(EmitContext&);
+void EmitBoundImageAtomicSMax32(EmitContext&);
+void EmitBoundImageAtomicUMax32(EmitContext&);
+void EmitBoundImageAtomicInc32(EmitContext&);
+void EmitBoundImageAtomicDec32(EmitContext&);
+void EmitBoundImageAtomicAnd32(EmitContext&);
+void EmitBoundImageAtomicOr32(EmitContext&);
+void EmitBoundImageAtomicXor32(EmitContext&);
+void EmitBoundImageAtomicExchange32(EmitContext&);
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value);
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarS32 value);
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value);
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarS32 value);
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value);
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                          ScalarU32 value);
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                          ScalarU32 value);
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                          ScalarU32 value);
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                         ScalarU32 value);
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                          ScalarU32 value);
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                               Register coord, ScalarU32 value);
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                      const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                   const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                     const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                          const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
+                     ScalarU32 swizzle);
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
new file mode 100644
index 000000000..f55c26b76
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -0,0 +1,294 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b,
+                      std::string_view lop) {
+    const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+    const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+    if (zero) {
+        zero->Invalidate();
+    }
+    if (sign) {
+        sign->Invalidate();
+    }
+    if (zero || sign) {
+        ctx.reg_alloc.InvalidateConditionCodes();
+    }
+    const auto ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b);
+    if (zero) {
+        ctx.Add("SEQ.S {},{},0;", *zero, ret);
+    }
+    if (sign) {
+        ctx.Add("SLT.S {},{},0;", *sign, ret);
+    }
+}
+} // Anonymous namespace
+
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    const std::array flags{
+        inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
+        inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
+        inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
+        inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
+    };
+    for (IR::Inst* const flag_inst : flags) {
+        if (flag_inst) {
+            flag_inst->Invalidate();
+        }
+    }
+    const bool cc{inst.HasAssociatedPseudoOperation()};
+    const std::string_view cc_mod{cc ? ".CC" : ""};
+    if (cc) {
+        ctx.reg_alloc.InvalidateConditionCodes();
+    }
+    const auto ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b);
+    if (!cc) {
+        return;
+    }
+    static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
+    for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
+        if (!flags[flag_index]) {
+            continue;
+        }
+        const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
+        if (flag_index == 0) {
+            ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
+        } else {
+            // We could use conditional execution here, but it's broken on Nvidia's compiler
+            ctx.Add("IF {}.x;"
+                    "MOV.S {}.x,-1;"
+                    "ELSE;"
+                    "MOV.S {}.x,0;"
+                    "ENDIF;",
+                    masks[flag_index], flag_ret, flag_ret);
+        }
+    }
+}
+
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
+    ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b);
+}
+
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    ctx.Add("SUB.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
+    ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b);
+}
+
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
+        ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
+    } else {
+        ctx.Add("MOV.S {},-{};", inst, value);
+    }
+}
+
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
+    ctx.LongAdd("MOV.S64 {},-{};", inst, value);
+}
+
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    ctx.Add("ABS.S {},{};", inst, value);
+}
+
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
+    ctx.Add("SHL.U {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+                            ScalarU32 shift) {
+    ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
+    ctx.Add("SHR.U {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+                             ScalarU32 shift) {
+    ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) {
+    ctx.Add("SHR.S {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+                                ScalarS32 shift) {
+    ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    BitwiseLogicalOp(ctx, inst, a, b, "AND");
+}
+
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    BitwiseLogicalOp(ctx, inst, a, b, "OR");
+}
+
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    BitwiseLogicalOp(ctx, inst, a, b, "XOR");
+}
+
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
+                        ScalarS32 offset, ScalarS32 count) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (count.type != Type::Register && offset.type != Type::Register) {
+        ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base);
+    } else {
+        ctx.Add("MOV.S RC.x,{};"
+                "MOV.S RC.y,{};"
+                "BFI.S {},RC,{},{};",
+                count, offset, ret, insert, base);
+    }
+}
+
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
+                          ScalarS32 count) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (count.type != Type::Register && offset.type != Type::Register) {
+        ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base);
+    } else {
+        ctx.Add("MOV.S RC.x,{};"
+                "MOV.S RC.y,{};"
+                "BFE.S {},RC,{};",
+                count, offset, ret, base);
+    }
+}
+
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
+                          ScalarU32 count) {
+    const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+    const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+    if (zero) {
+        zero->Invalidate();
+    }
+    if (sign) {
+        sign->Invalidate();
+    }
+    if (zero || sign) {
+        ctx.reg_alloc.InvalidateConditionCodes();
+    }
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (count.type != Type::Register && offset.type != Type::Register) {
+        ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
+    } else {
+        ctx.Add("MOV.U RC.x,{};"
+                "MOV.U RC.y,{};"
+                "BFE.U {},RC,{};",
+                count, offset, ret, base);
+    }
+    if (zero) {
+        ctx.Add("SEQ.S {},{},0;", *zero, ret);
+    }
+    if (sign) {
+        ctx.Add("SLT.S {},{},0;", *sign, ret);
+    }
+}
+
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    ctx.Add("BFR {},{};", inst, value);
+}
+
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    ctx.Add("BTC {},{};", inst, value);
+}
+
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    ctx.Add("NOT.S {},{};", inst, value);
+}
+
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    ctx.Add("BTFM.S {},{};", inst, value);
+}
+
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+    ctx.Add("BTFM.U {},{};", inst, value);
+}
+
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    ctx.Add("MIN.S {},{},{};", inst, a, b);
+}
+
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+    ctx.Add("MIN.U {},{},{};", inst, a, b);
+}
+
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    ctx.Add("MAX.S {},{},{};", inst, a, b);
+}
+
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+    ctx.Add("MAX.U {},{},{};", inst, a, b);
+}
+
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("MIN.S RC.x,{},{};"
+            "MAX.S {}.x,RC.x,{};",
+            max, value, ret, min);
+}
+
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("MIN.U RC.x,{},{};"
+            "MAX.U {}.x,RC.x,{};",
+            max, value, ret, min);
+}
+
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+    ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+    ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+    ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+    ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+    ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+    ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+    ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+    ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+    ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+    ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
new file mode 100644
index 000000000..af9fac7c1
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -0,0 +1,568 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+               std::string_view then_expr, std::string_view else_expr = {}) {
+    // Operate on bindless SSBO, call the expression with bounds checking
+    // address = c[binding].xy
+    // length  = c[binding].z
+    const u32 sb_binding{binding.U32()};
+    ctx.Add("PK64.U DC,c[{}];"           // pointer = address
+            "CVT.U64.U32 DC.z,{};"       // offset = uint64_t(offset)
+            "ADD.U64 DC.x,DC.x,DC.z;"    // pointer += offset
+            "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
+            sb_binding, offset, offset, sb_binding);
+    if (else_expr.empty()) {
+        ctx.Add("IF NE.x;{}ENDIF;", then_expr);
+    } else {
+        ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
+    }
+}
+
+void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
+                     std::string_view else_expr = {}) {
+    const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
+    for (size_t index = 0; index < num_buffers; ++index) {
+        if (!ctx.info.nvn_buffer_used[index]) {
+            continue;
+        }
+        const auto& ssbo{ctx.info.storage_buffers_descriptors[index]};
+        ctx.Add("LDC.U64 DC.x,c{}[{}];"    // ssbo_addr
+                "LDC.U32 RC.x,c{}[{}];"    // ssbo_size_u32
+                "CVT.U64.U32 DC.y,RC.x;"   // ssbo_size = ssbo_size_u32
+                "ADD.U64 DC.y,DC.y,DC.x;"  // ssbo_end = ssbo_addr + ssbo_size
+                "SGE.U64 RC.x,{}.x,DC.x;"  // a = input_addr >= ssbo_addr ? -1 : 0
+                "SLT.U64 RC.y,{}.x,DC.y;"  // b = input_addr < ssbo_end   ? -1 : 0
+                "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b
+                "IF NE.x;"                 // if cond
+                "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
+                ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
+                address, address);
+        if (pointer_based) {
+            ctx.Add("PK64.U DC.y,c[{}];"      // host_ssbo = cbuf
+                    "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
+                    "{}"
+                    "ELSE;",
+                    index, expr);
+        } else {
+            ctx.Add("CVT.U32.U64 RC.x,DC.x;"
+                    "{},ssbo{}[RC.x];"
+                    "ELSE;",
+                    expr, index);
+        }
+    }
+    if (!else_expr.empty()) {
+        ctx.Add("{}", else_expr);
+    }
+    const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()};
+    for (size_t index = 0; index < num_used_buffers; ++index) {
+        ctx.Add("ENDIF;");
+    }
+}
+
+template <typename ValueType>
+void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
+           std::string_view size) {
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
+    } else {
+        StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
+    }
+}
+
+void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+          std::string_view size) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
+    } else {
+        StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
+                  fmt::format("MOV.U {},{{0,0,0,0}};", ret));
+    }
+}
+
+template <typename ValueType>
+void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
+    } else {
+        GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
+    }
+}
+
+void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
+    } else {
+        GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
+                        fmt::format("MOV.S {},0;", ret));
+    }
+}
+
+template <typename ValueType>
+void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+          ValueType value, std::string_view operation, std::string_view size) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
+                offset);
+    } else {
+        StorageOp(ctx, binding, offset,
+                  fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
+    }
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
+    GlobalLoad(ctx, inst, address, "U8");
+}
+
+void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) {
+    GlobalLoad(ctx, inst, address, "S8");
+}
+
+void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) {
+    GlobalLoad(ctx, inst, address, "U16");
+}
+
+void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) {
+    GlobalLoad(ctx, inst, address, "S16");
+}
+
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) {
+    GlobalLoad(ctx, inst, address, "U32");
+}
+
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) {
+    GlobalLoad(ctx, inst, address, "U32X2");
+}
+
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) {
+    GlobalLoad(ctx, inst, address, "U32X4");
+}
+
+void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) {
+    GlobalWrite(ctx, address, value, "U8");
+}
+
+void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) {
+    GlobalWrite(ctx, address, value, "S8");
+}
+
+void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) {
+    GlobalWrite(ctx, address, value, "U16");
+}
+
+void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) {
+    GlobalWrite(ctx, address, value, "S16");
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) {
+    GlobalWrite(ctx, address, value, "U32");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) {
+    GlobalWrite(ctx, address, value, "U32X2");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) {
+    GlobalWrite(ctx, address, value, "U32X4");
+}
+
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       ScalarU32 offset) {
+    Load(ctx, inst, binding, offset, "U8");
+}
+
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       ScalarU32 offset) {
+    Load(ctx, inst, binding, offset, "S8");
+}
+
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        ScalarU32 offset) {
+    Load(ctx, inst, binding, offset, "U16");
+}
+
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        ScalarU32 offset) {
+    Load(ctx, inst, binding, offset, "S16");
+}
+
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       ScalarU32 offset) {
+    Load(ctx, inst, binding, offset, "U32");
+}
+
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       ScalarU32 offset) {
+    Load(ctx, inst, binding, offset, "U32X2");
+}
+
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        ScalarU32 offset) {
+    Load(ctx, inst, binding, offset, "U32X4");
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                        ScalarU32 value) {
+    Write(ctx, binding, offset, value, "U8");
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                        ScalarS32 value) {
+    Write(ctx, binding, offset, value, "S8");
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                         ScalarU32 value) {
+    Write(ctx, binding, offset, value, "U16");
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                         ScalarS32 value) {
+    Write(ctx, binding, offset, value, "S16");
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                        ScalarU32 value) {
+    Write(ctx, binding, offset, value, "U32");
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                        Register value) {
+    Write(ctx, binding, offset, value, "U32X2");
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+                         Register value) {
+    Write(ctx, binding, offset, value, "U32X4");
+}
+
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value) {
+    ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarS32 value) {
+    ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value) {
+    ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarS32 value) {
+    ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value) {
+    ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value) {
+    ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value) {
+    ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value) {
+    ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                          ScalarU32 value) {
+    ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value) {
+    ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                                ScalarU32 value) {
+    ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                                Register value) {
+    ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "ADD", "U32");
+}
+
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarS32 value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "S32");
+}
+
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "U32");
+}
+
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarS32 value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "S32");
+}
+
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "U32");
+}
+
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
+}
+
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
+}
+
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "AND", "U32");
+}
+
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "OR", "U32");
+}
+
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "XOR", "U32");
+}
+
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
+}
+
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "ADD", "U64");
+}
+
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "S64");
+}
+
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "U64");
+}
+
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "S64");
+}
+
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "U64");
+}
+
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "AND", "U64");
+}
+
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "OR", "U64");
+}
+
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "XOR", "U64");
+}
+
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
+}
+
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarF32 value) {
+    Atom(ctx, inst, binding, offset, value, "ADD", "F32");
+}
+
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
+}
+
+void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                               [[maybe_unused]] const IR::Value& binding,
+                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
+}
+
+void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                               [[maybe_unused]] const IR::Value& binding,
+                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
+}
+
+void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                               [[maybe_unused]] const IR::Value& binding,
+                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF16x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF16x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF32x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF16x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF32x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
new file mode 100644
index 000000000..ff64c6924
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -0,0 +1,273 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+namespace Shader::Backend::GLASM {
+
+#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
+
+static void DefinePhi(EmitContext& ctx, IR::Inst& phi) {
+    switch (phi.Arg(0).Type()) {
+    case IR::Type::U1:
+    case IR::Type::U32:
+    case IR::Type::F32:
+        ctx.reg_alloc.Define(phi);
+        break;
+    case IR::Type::U64:
+    case IR::Type::F64:
+        ctx.reg_alloc.LongDefine(phi);
+        break;
+    default:
+        throw NotImplementedException("Phi node type {}", phi.Type());
+    }
+}
+
+void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
+    const size_t num_args{phi.NumArgs()};
+    for (size_t i = 0; i < num_args; ++i) {
+        ctx.reg_alloc.Consume(phi.Arg(i));
+    }
+    if (!phi.Definition<Id>().is_valid) {
+        // The phi node wasn't forward defined
+        DefinePhi(ctx, phi);
+    }
+}
+
+void EmitVoid(EmitContext&) {}
+
+void EmitReference(EmitContext& ctx, const IR::Value& value) {
+    ctx.reg_alloc.Consume(value);
+}
+
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
+    IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())};
+    if (!phi.Definition<Id>().is_valid) {
+        // The phi node wasn't forward defined
+        DefinePhi(ctx, phi);
+    }
+    const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})};
+    const Value eval_value{ctx.reg_alloc.Consume(value)};
+
+    if (phi_reg == eval_value) {
+        return;
+    }
+    switch (phi.Flags<IR::Type>()) {
+    case IR::Type::U1:
+    case IR::Type::U32:
+    case IR::Type::F32:
+        ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value});
+        break;
+    case IR::Type::U64:
+    case IR::Type::F64:
+        ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value});
+        break;
+    default:
+        throw NotImplementedException("Phi node type {}", phi.Type());
+    }
+}
+
+void EmitJoin(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+    ctx.Add("KIL TR.x;");
+}
+
+void EmitBarrier(EmitContext& ctx) {
+    ctx.Add("BAR;");
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+    ctx.Add("MEMBAR.CTA;");
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+    ctx.Add("MEMBAR;");
+}
+
+void EmitPrologue(EmitContext& ctx) {
+    // TODO
+}
+
+void EmitEpilogue(EmitContext& ctx) {
+    // TODO
+}
+
+void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) {
+    if (stream.type == Type::U32 && stream.imm_u32 == 0) {
+        ctx.Add("EMIT;");
+    } else {
+        ctx.Add("EMITS {};", stream);
+    }
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+    if (!stream.IsImmediate()) {
+        LOG_WARNING(Shader_GLASM, "Stream is not immediate");
+    }
+    ctx.reg_alloc.Consume(stream);
+    ctx.Add("ENDPRIM;");
+}
+
+void EmitGetRegister(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetRegister(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetPred(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetPred(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetGotoVariable(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetGotoVariable(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetIndirectBranchVariable(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetIndirectBranchVariable(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetZFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetSFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetCFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetOFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetZFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetSFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetCFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetOFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {},invocation.groupid;", inst);
+}
+
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {},invocation.localid;", inst);
+}
+
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst);
+}
+
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst);
+}
+
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst);
+}
+
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
+    ctx.uses_y_direction = true;
+    ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
+}
+
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
+    ctx.LongAdd("MOV.S64 {}.x,0;", inst);
+}
+
+void EmitGetZeroFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetSignFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetCarryFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetOverflowFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetSparseFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetInBoundsFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    ctx.Add("OR.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    ctx.Add("AND.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    ctx.Add("XOR.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+    ctx.Add("SEQ.S {},{},0;", inst, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
new file mode 100644
index 000000000..68fff613c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
@@ -0,0 +1,67 @@
+
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+                  ScalarS32 false_value) {
+    ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+                  [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+                   [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+                   ScalarS32 false_value) {
+    ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
+                   Register false_value) {
+    ctx.reg_alloc.InvalidateConditionCodes();
+    const Register ret{ctx.reg_alloc.LongDefine(inst)};
+    if (ret == true_value) {
+        ctx.Add("MOV.S.CC RC.x,{};"
+                "MOV.U64 {}.x(EQ.x),{};",
+                cond, ret, false_value);
+    } else if (ret == false_value) {
+        ctx.Add("MOV.S.CC RC.x,{};"
+                "MOV.U64 {}.x(NE.x),{};",
+                cond, ret, true_value);
+    } else {
+        ctx.Add("MOV.S.CC RC.x,{};"
+                "MOV.U64 {}.x,{};"
+                "MOV.U64 {}.x(NE.x),{};",
+                cond, ret, false_value, ret, true_value);
+    }
+}
+
+void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+                   [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+                   ScalarS32 false_value) {
+    ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+                   [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
new file mode 100644
index 000000000..c1498f449
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
@@ -0,0 +1,58 @@
+
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+    ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+    ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+    ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+    ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+    ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+    ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+    ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+    ctx.Add("STS.U8 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+    ctx.Add("STS.U16 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+    ctx.Add("STS.U32 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) {
+    ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) {
+    ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset);
+}
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
new file mode 100644
index 000000000..544d475b4
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
@@ -0,0 +1,150 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLASM {
+
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name);
+}
+
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+    ctx.Add("TGALL.S {}.x,{};", inst, pred);
+}
+
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+    ctx.Add("TGANY.S {}.x,{};", inst, pred);
+}
+
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+    ctx.Add("TGEQ.S {}.x,{};", inst, pred);
+}
+
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+    ctx.Add("TGBALLOT {}.x,{};", inst, pred);
+}
+
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name);
+}
+
+static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                    const IR::Value& clamp, const IR::Value& segmentation_mask,
+                    std::string_view op) {
+    IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+    if (in_bounds) {
+        in_bounds->Invalidate();
+    }
+    std::string mask;
+    if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
+        mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
+    } else {
+        mask = "RC";
+        ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};",
+                ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)},
+                ScalarU32{ctx.reg_alloc.Consume(clamp)});
+    }
+    const Register value_ret{ctx.reg_alloc.Define(inst)};
+    if (in_bounds) {
+        const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
+        ctx.Add("SHF{}.U {},{},{},{};"
+                "MOV.U {}.x,{}.y;",
+                op, bounds_ret, value, index, mask, value_ret, bounds_ret);
+    } else {
+        ctx.Add("SHF{}.U {},{},{},{};"
+                "MOV.U {}.x,{}.y;",
+                op, value_ret, value, index, mask, value_ret, value_ret);
+    }
+}
+
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                      const IR::Value& clamp, const IR::Value& segmentation_mask) {
+    Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX");
+}
+
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                   const IR::Value& clamp, const IR::Value& segmentation_mask) {
+    Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP");
+}
+
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                     const IR::Value& clamp, const IR::Value& segmentation_mask) {
+    Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN");
+}
+
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+                          const IR::Value& clamp, const IR::Value& segmentation_mask) {
+    Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
+}
+
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
+                     ScalarU32 swizzle) {
+    const auto ret{ctx.reg_alloc.Define(inst)};
+    ctx.Add("AND.U RC.z,{}.threadid,3;"
+            "SHL.U RC.z,RC.z,1;"
+            "SHR.U RC.z,{},RC.z;"
+            "AND.U RC.z,RC.z,3;"
+            "MUL.F RC.x,{},FSWZA[RC.z];"
+            "MUL.F RC.y,{},FSWZB[RC.z];"
+            "ADD.F {}.x,RC.x,RC.y;",
+            ctx.stage_name, swizzle, op_a, op_b, ret);
+}
+
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+    if (ctx.profile.support_derivative_control) {
+        ctx.Add("DDX.FINE {}.x,{};", inst, p);
+    } else {
+        LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
+        ctx.Add("DDX {}.x,{};", inst, p);
+    }
+}
+
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+    if (ctx.profile.support_derivative_control) {
+        ctx.Add("DDY.FINE {}.x,{};", inst, p);
+    } else {
+        LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
+        ctx.Add("DDY {}.x,{};", inst, p);
+    }
+}
+
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+    if (ctx.profile.support_derivative_control) {
+        ctx.Add("DDX.COARSE {}.x,{};", inst, p);
+    } else {
+        LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
+        ctx.Add("DDX {}.x,{};", inst, p);
+    }
+}
+
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+    if (ctx.profile.support_derivative_control) {
+        ctx.Add("DDY.COARSE {}.x,{};", inst, p);
+    } else {
+        LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
+        ctx.Add("DDY {}.x,{};", inst, p);
+    }
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
new file mode 100644
index 000000000..4c046db6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -0,0 +1,186 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+Register RegAlloc::Define(IR::Inst& inst) {
+    return Define(inst, false);
+}
+
+Register RegAlloc::LongDefine(IR::Inst& inst) {
+    return Define(inst, true);
+}
+
+Value RegAlloc::Peek(const IR::Value& value) {
+    if (value.IsImmediate()) {
+        return MakeImm(value);
+    } else {
+        return PeekInst(*value.Inst());
+    }
+}
+
+Value RegAlloc::Consume(const IR::Value& value) {
+    if (value.IsImmediate()) {
+        return MakeImm(value);
+    } else {
+        return ConsumeInst(*value.Inst());
+    }
+}
+
+void RegAlloc::Unref(IR::Inst& inst) {
+    IR::Inst& value_inst{AliasInst(inst)};
+    value_inst.DestructiveRemoveUsage();
+    if (!value_inst.HasUses()) {
+        Free(value_inst.Definition<Id>());
+    }
+}
+
+Register RegAlloc::AllocReg() {
+    Register ret;
+    ret.type = Type::Register;
+    ret.id = Alloc(false);
+    return ret;
+}
+
+Register RegAlloc::AllocLongReg() {
+    Register ret;
+    ret.type = Type::Register;
+    ret.id = Alloc(true);
+    return ret;
+}
+
+void RegAlloc::FreeReg(Register reg) {
+    Free(reg.id);
+}
+
+Value RegAlloc::MakeImm(const IR::Value& value) {
+    Value ret;
+    switch (value.Type()) {
+    case IR::Type::Void:
+        ret.type = Type::Void;
+        break;
+    case IR::Type::U1:
+        ret.type = Type::U32;
+        ret.imm_u32 = value.U1() ? 0xffffffff : 0;
+        break;
+    case IR::Type::U32:
+        ret.type = Type::U32;
+        ret.imm_u32 = value.U32();
+        break;
+    case IR::Type::F32:
+        ret.type = Type::U32;
+        ret.imm_u32 = Common::BitCast<u32>(value.F32());
+        break;
+    case IR::Type::U64:
+        ret.type = Type::U64;
+        ret.imm_u64 = value.U64();
+        break;
+    case IR::Type::F64:
+        ret.type = Type::U64;
+        ret.imm_u64 = Common::BitCast<u64>(value.F64());
+        break;
+    default:
+        throw NotImplementedException("Immediate type {}", value.Type());
+    }
+    return ret;
+}
+
+Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
+    if (inst.HasUses()) {
+        inst.SetDefinition<Id>(Alloc(is_long));
+    } else {
+        Id id{};
+        id.is_long.Assign(is_long ? 1 : 0);
+        id.is_null.Assign(1);
+        inst.SetDefinition<Id>(id);
+    }
+    return Register{PeekInst(inst)};
+}
+
+Value RegAlloc::PeekInst(IR::Inst& inst) {
+    Value ret;
+    ret.type = Type::Register;
+    ret.id = inst.Definition<Id>();
+    return ret;
+}
+
+Value RegAlloc::ConsumeInst(IR::Inst& inst) {
+    Unref(inst);
+    return PeekInst(inst);
+}
+
+Id RegAlloc::Alloc(bool is_long) {
+    size_t& num_regs{is_long ? num_used_long_registers : num_used_registers};
+    std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use};
+    if (num_used_registers + num_used_long_registers < NUM_REGS) {
+        for (size_t reg = 0; reg < NUM_REGS; ++reg) {
+            if (use[reg]) {
+                continue;
+            }
+            num_regs = std::max(num_regs, reg + 1);
+            use[reg] = true;
+            Id ret{};
+            ret.is_valid.Assign(1);
+            ret.is_long.Assign(is_long ? 1 : 0);
+            ret.is_spill.Assign(0);
+            ret.is_condition_code.Assign(0);
+            ret.is_null.Assign(0);
+            ret.index.Assign(static_cast<u32>(reg));
+            return ret;
+        }
+    }
+    throw NotImplementedException("Register spilling");
+}
+
+void RegAlloc::Free(Id id) {
+    if (id.is_valid == 0) {
+        throw LogicError("Freeing invalid register");
+    }
+    if (id.is_spill != 0) {
+        throw NotImplementedException("Free spill");
+    }
+    if (id.is_long != 0) {
+        long_register_use[id.index] = false;
+    } else {
+        register_use[id.index] = false;
+    }
+}
+
+/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::Identity:
+    case IR::Opcode::BitCastU16F16:
+    case IR::Opcode::BitCastU32F32:
+    case IR::Opcode::BitCastU64F64:
+    case IR::Opcode::BitCastF16U16:
+    case IR::Opcode::BitCastF32U32:
+    case IR::Opcode::BitCastF64U64:
+        return true;
+    default:
+        return false;
+    }
+}
+
+/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) {
+    IR::Inst* it{&inst};
+    while (IsAliased(*it)) {
+        const IR::Value arg{it->Arg(0)};
+        if (arg.IsImmediate()) {
+            break;
+        }
+        it = arg.InstRecursive();
+    }
+    return *it;
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h
new file mode 100644
index 000000000..82aec66c6
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.h
@@ -0,0 +1,303 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+
+#include <fmt/format.h>
+
+#include "common/bit_cast.h"
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext;
+
+enum class Type : u32 {
+    Void,
+    Register,
+    U32,
+    U64,
+};
+
+struct Id {
+    union {
+        u32 raw;
+        BitField<0, 1, u32> is_valid;
+        BitField<1, 1, u32> is_long;
+        BitField<2, 1, u32> is_spill;
+        BitField<3, 1, u32> is_condition_code;
+        BitField<4, 1, u32> is_null;
+        BitField<5, 27, u32> index;
+    };
+
+    bool operator==(Id rhs) const noexcept {
+        return raw == rhs.raw;
+    }
+    bool operator!=(Id rhs) const noexcept {
+        return !operator==(rhs);
+    }
+};
+static_assert(sizeof(Id) == sizeof(u32));
+
+struct Value {
+    Type type;
+    union {
+        Id id;
+        u32 imm_u32;
+        u64 imm_u64;
+    };
+
+    bool operator==(const Value& rhs) const noexcept {
+        if (type != rhs.type) {
+            return false;
+        }
+        switch (type) {
+        case Type::Void:
+            return true;
+        case Type::Register:
+            return id == rhs.id;
+        case Type::U32:
+            return imm_u32 == rhs.imm_u32;
+        case Type::U64:
+            return imm_u64 == rhs.imm_u64;
+        }
+        return false;
+    }
+    bool operator!=(const Value& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+};
+struct Register : Value {};
+struct ScalarRegister : Value {};
+struct ScalarU32 : Value {};
+struct ScalarS32 : Value {};
+struct ScalarF32 : Value {};
+struct ScalarF64 : Value {};
+
+class RegAlloc {
+public:
+    RegAlloc() = default;
+
+    Register Define(IR::Inst& inst);
+
+    Register LongDefine(IR::Inst& inst);
+
+    [[nodiscard]] Value Peek(const IR::Value& value);
+
+    Value Consume(const IR::Value& value);
+
+    void Unref(IR::Inst& inst);
+
+    [[nodiscard]] Register AllocReg();
+
+    [[nodiscard]] Register AllocLongReg();
+
+    void FreeReg(Register reg);
+
+    void InvalidateConditionCodes() {
+        // This does nothing for now
+    }
+
+    [[nodiscard]] size_t NumUsedRegisters() const noexcept {
+        return num_used_registers;
+    }
+
+    [[nodiscard]] size_t NumUsedLongRegisters() const noexcept {
+        return num_used_long_registers;
+    }
+
+    [[nodiscard]] bool IsEmpty() const noexcept {
+        return register_use.none() && long_register_use.none();
+    }
+
+    /// Returns true if the instruction is expected to be aliased to another
+    static bool IsAliased(const IR::Inst& inst);
+
+    /// Returns the underlying value out of an alias sequence
+    static IR::Inst& AliasInst(IR::Inst& inst);
+
+private:
+    static constexpr size_t NUM_REGS = 4096;
+    static constexpr size_t NUM_ELEMENTS = 4;
+
+    Value MakeImm(const IR::Value& value);
+
+    Register Define(IR::Inst& inst, bool is_long);
+
+    Value PeekInst(IR::Inst& inst);
+
+    Value ConsumeInst(IR::Inst& inst);
+
+    Id Alloc(bool is_long);
+
+    void Free(Id id);
+
+    size_t num_used_registers{};
+    size_t num_used_long_registers{};
+    std::bitset<NUM_REGS> register_use{};
+    std::bitset<NUM_REGS> long_register_use{};
+};
+
+template <bool scalar, typename FormatContext>
+auto FormatTo(FormatContext& ctx, Id id) {
+    if (id.is_condition_code != 0) {
+        throw NotImplementedException("Condition code emission");
+    }
+    if (id.is_spill != 0) {
+        throw NotImplementedException("Spill emission");
+    }
+    if constexpr (scalar) {
+        if (id.is_null != 0) {
+            return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
+        }
+        if (id.is_long != 0) {
+            return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
+        } else {
+            return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
+        }
+    } else {
+        if (id.is_null != 0) {
+            return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
+        }
+        if (id.is_long != 0) {
+            return fmt::format_to(ctx.out(), "D{}", id.index.Value());
+        } else {
+            return fmt::format_to(ctx.out(), "R{}", id.index.Value());
+        }
+    }
+}
+
+} // namespace Shader::Backend::GLASM
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::Id> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) {
+        return Shader::Backend::GLASM::FormatTo<true>(ctx, id);
+    }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::Register> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) {
+        if (value.type != Shader::Backend::GLASM::Type::Register) {
+            throw Shader::InvalidArgument("Register value type is not register");
+        }
+        return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id);
+    }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) {
+        if (value.type != Shader::Backend::GLASM::Type::Register) {
+            throw Shader::InvalidArgument("Register value type is not register");
+        }
+        return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+    }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) {
+        switch (value.type) {
+        case Shader::Backend::GLASM::Type::Void:
+            break;
+        case Shader::Backend::GLASM::Type::Register:
+            return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+        case Shader::Backend::GLASM::Type::U32:
+            return fmt::format_to(ctx.out(), "{}", value.imm_u32);
+        case Shader::Backend::GLASM::Type::U64:
+            break;
+        }
+        throw Shader::InvalidArgument("Invalid value type {}", value.type);
+    }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) {
+        switch (value.type) {
+        case Shader::Backend::GLASM::Type::Void:
+            break;
+        case Shader::Backend::GLASM::Type::Register:
+            return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+        case Shader::Backend::GLASM::Type::U32:
+            return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32));
+        case Shader::Backend::GLASM::Type::U64:
+            break;
+        }
+        throw Shader::InvalidArgument("Invalid value type {}", value.type);
+    }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) {
+        switch (value.type) {
+        case Shader::Backend::GLASM::Type::Void:
+            break;
+        case Shader::Backend::GLASM::Type::Register:
+            return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+        case Shader::Backend::GLASM::Type::U32:
+            return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32));
+        case Shader::Backend::GLASM::Type::U64:
+            break;
+        }
+        throw Shader::InvalidArgument("Invalid value type {}", value.type);
+    }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) {
+        switch (value.type) {
+        case Shader::Backend::GLASM::Type::Void:
+            break;
+        case Shader::Backend::GLASM::Type::Register:
+            return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+        case Shader::Backend::GLASM::Type::U32:
+            break;
+        case Shader::Backend::GLASM::Type::U64:
+            return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64));
+        }
+        throw Shader::InvalidArgument("Invalid value type {}", value.type);
+    }
+};
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
new file mode 100644
index 000000000..4e6f2c0fe
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -0,0 +1,715 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+u32 CbufIndex(size_t offset) {
+    return (offset / 4) % 4;
+}
+
+char Swizzle(size_t offset) {
+    return "xyzw"[CbufIndex(offset)];
+}
+
+std::string_view InterpDecorator(Interpolation interp) {
+    switch (interp) {
+    case Interpolation::Smooth:
+        return "";
+    case Interpolation::Flat:
+        return "flat ";
+    case Interpolation::NoPerspective:
+        return "noperspective ";
+    }
+    throw InvalidArgument("Invalid interpolation {}", interp);
+}
+
+std::string_view InputArrayDecorator(Stage stage) {
+    switch (stage) {
+    case Stage::Geometry:
+    case Stage::TessellationControl:
+    case Stage::TessellationEval:
+        return "[]";
+    default:
+        return "";
+    }
+}
+
+bool StoresPerVertexAttributes(Stage stage) {
+    switch (stage) {
+    case Stage::VertexA:
+    case Stage::VertexB:
+    case Stage::Geometry:
+    case Stage::TessellationEval:
+        return true;
+    default:
+        return false;
+    }
+}
+
+std::string OutputDecorator(Stage stage, u32 size) {
+    switch (stage) {
+    case Stage::TessellationControl:
+        return fmt::format("[{}]", size);
+    default:
+        return "";
+    }
+}
+
+std::string_view SamplerType(TextureType type, bool is_depth) {
+    if (is_depth) {
+        switch (type) {
+        case TextureType::Color1D:
+            return "sampler1DShadow";
+        case TextureType::ColorArray1D:
+            return "sampler1DArrayShadow";
+        case TextureType::Color2D:
+            return "sampler2DShadow";
+        case TextureType::ColorArray2D:
+            return "sampler2DArrayShadow";
+        case TextureType::ColorCube:
+            return "samplerCubeShadow";
+        case TextureType::ColorArrayCube:
+            return "samplerCubeArrayShadow";
+        default:
+            throw NotImplementedException("Texture type: {}", type);
+        }
+    }
+    switch (type) {
+    case TextureType::Color1D:
+        return "sampler1D";
+    case TextureType::ColorArray1D:
+        return "sampler1DArray";
+    case TextureType::Color2D:
+        return "sampler2D";
+    case TextureType::ColorArray2D:
+        return "sampler2DArray";
+    case TextureType::Color3D:
+        return "sampler3D";
+    case TextureType::ColorCube:
+        return "samplerCube";
+    case TextureType::ColorArrayCube:
+        return "samplerCubeArray";
+    case TextureType::Buffer:
+        return "samplerBuffer";
+    default:
+        throw NotImplementedException("Texture type: {}", type);
+    }
+}
+
+std::string_view ImageType(TextureType type) {
+    switch (type) {
+    case TextureType::Color1D:
+        return "uimage1D";
+    case TextureType::ColorArray1D:
+        return "uimage1DArray";
+    case TextureType::Color2D:
+        return "uimage2D";
+    case TextureType::ColorArray2D:
+        return "uimage2DArray";
+    case TextureType::Color3D:
+        return "uimage3D";
+    case TextureType::ColorCube:
+        return "uimageCube";
+    case TextureType::ColorArrayCube:
+        return "uimageCubeArray";
+    case TextureType::Buffer:
+        return "uimageBuffer";
+    default:
+        throw NotImplementedException("Image type: {}", type);
+    }
+}
+
+std::string_view ImageFormatString(ImageFormat format) {
+    switch (format) {
+    case ImageFormat::Typeless:
+        return "";
+    case ImageFormat::R8_UINT:
+        return ",r8ui";
+    case ImageFormat::R8_SINT:
+        return ",r8i";
+    case ImageFormat::R16_UINT:
+        return ",r16ui";
+    case ImageFormat::R16_SINT:
+        return ",r16i";
+    case ImageFormat::R32_UINT:
+        return ",r32ui";
+    case ImageFormat::R32G32_UINT:
+        return ",rg32ui";
+    case ImageFormat::R32G32B32A32_UINT:
+        return ",rgba32ui";
+    default:
+        throw NotImplementedException("Image format: {}", format);
+    }
+}
+
+std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
+    if (is_written && !is_read) {
+        return "writeonly ";
+    }
+    if (is_read && !is_written) {
+        return "readonly ";
+    }
+    return "";
+}
+
+std::string_view GetTessMode(TessPrimitive primitive) {
+    switch (primitive) {
+    case TessPrimitive::Triangles:
+        return "triangles";
+    case TessPrimitive::Quads:
+        return "quads";
+    case TessPrimitive::Isolines:
+        return "isolines";
+    }
+    throw InvalidArgument("Invalid tessellation primitive {}", primitive);
+}
+
+std::string_view GetTessSpacing(TessSpacing spacing) {
+    switch (spacing) {
+    case TessSpacing::Equal:
+        return "equal_spacing";
+    case TessSpacing::FractionalOdd:
+        return "fractional_odd_spacing";
+    case TessSpacing::FractionalEven:
+        return "fractional_even_spacing";
+    }
+    throw InvalidArgument("Invalid tessellation spacing {}", spacing);
+}
+
+std::string_view InputPrimitive(InputTopology topology) {
+    switch (topology) {
+    case InputTopology::Points:
+        return "points";
+    case InputTopology::Lines:
+        return "lines";
+    case InputTopology::LinesAdjacency:
+        return "lines_adjacency";
+    case InputTopology::Triangles:
+        return "triangles";
+    case InputTopology::TrianglesAdjacency:
+        return "triangles_adjacency";
+    }
+    throw InvalidArgument("Invalid input topology {}", topology);
+}
+
+std::string_view OutputPrimitive(OutputTopology topology) {
+    switch (topology) {
+    case OutputTopology::PointList:
+        return "points";
+    case OutputTopology::LineStrip:
+        return "line_strip";
+    case OutputTopology::TriangleStrip:
+        return "triangle_strip";
+    }
+    throw InvalidArgument("Invalid output topology {}", topology);
+}
+
+void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) {
+    if (!ctx.info.stores.Legacy()) {
+        return;
+    }
+    if (ctx.info.stores.FixedFunctionTexture()) {
+        header += "vec4 gl_TexCoord[8];";
+    }
+    if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        header += "vec4 gl_FrontColor;";
+    }
+    if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) {
+        header += "vec4 gl_FrontSecondaryColor;";
+    }
+    if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) {
+        header += "vec4 gl_BackColor;";
+    }
+    if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) {
+        header += "vec4 gl_BackSecondaryColor;";
+    }
+}
+
+void SetupOutPerVertex(EmitContext& ctx, std::string& header) {
+    if (!StoresPerVertexAttributes(ctx.stage)) {
+        return;
+    }
+    if (ctx.uses_geometry_passthrough) {
+        return;
+    }
+    header += "out gl_PerVertex{vec4 gl_Position;";
+    if (ctx.info.stores[IR::Attribute::PointSize]) {
+        header += "float gl_PointSize;";
+    }
+    if (ctx.info.stores.ClipDistances()) {
+        header += "float gl_ClipDistance[];";
+    }
+    if (ctx.info.stores[IR::Attribute::ViewportIndex] &&
+        ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
+        header += "int gl_ViewportIndex;";
+    }
+    SetupLegacyOutPerVertex(ctx, header);
+    header += "};";
+    if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) {
+        header += "out int gl_ViewportIndex;";
+    }
+}
+
+void SetupInPerVertex(EmitContext& ctx, std::string& header) {
+    // Currently only required for TessellationControl to adhere to
+    // ARB_separate_shader_objects requirements
+    if (ctx.stage != Stage::TessellationControl) {
+        return;
+    }
+    const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)};
+    const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]};
+    const bool loads_clip_distance{ctx.info.loads.ClipDistances()};
+    const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance};
+    if (!loads_per_vertex) {
+        return;
+    }
+    header += "in gl_PerVertex{";
+    if (loads_position) {
+        header += "vec4 gl_Position;";
+    }
+    if (loads_point_size) {
+        header += "float gl_PointSize;";
+    }
+    if (loads_clip_distance) {
+        header += "float gl_ClipDistance[];";
+    }
+    header += "}gl_in[gl_MaxPatchVertices];";
+}
+
+void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
+    if (!ctx.info.loads.Legacy()) {
+        return;
+    }
+    header += "in gl_PerFragment{";
+    if (ctx.info.loads.FixedFunctionTexture()) {
+        header += "vec4 gl_TexCoord[8];";
+    }
+    if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        header += "vec4 gl_Color;";
+    }
+    header += "};";
+}
+
+} // Anonymous namespace
+
+EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+                         const RuntimeInfo& runtime_info_)
+    : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage},
+      uses_geometry_passthrough{program.is_geometry_passthrough &&
+                                profile.support_geometry_shader_passthrough} {
+    if (profile.need_fastmath_off) {
+        header += "#pragma optionNV(fastmath off)\n";
+    }
+    SetupExtensions();
+    switch (program.stage) {
+    case Stage::VertexA:
+    case Stage::VertexB:
+        stage_name = "vs";
+        break;
+    case Stage::TessellationControl:
+        stage_name = "tcs";
+        header += fmt::format("layout(vertices={})out;", program.invocations);
+        break;
+    case Stage::TessellationEval:
+        stage_name = "tes";
+        header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive),
+                              GetTessSpacing(runtime_info.tess_spacing),
+                              runtime_info.tess_clockwise ? "cw" : "ccw");
+        break;
+    case Stage::Geometry:
+        stage_name = "gs";
+        header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology));
+        if (uses_geometry_passthrough) {
+            header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};";
+            break;
+        } else if (program.is_geometry_passthrough &&
+                   !profile.support_geometry_shader_passthrough) {
+            LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported");
+        }
+        header += fmt::format(
+            "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];",
+            OutputPrimitive(program.output_topology), program.output_vertices);
+        break;
+    case Stage::Fragment:
+        stage_name = "fs";
+        position_name = "gl_FragCoord";
+        if (runtime_info.force_early_z) {
+            header += "layout(early_fragment_tests)in;";
+        }
+        if (info.uses_sample_id) {
+            header += "in int gl_SampleID;";
+        }
+        if (info.stores_sample_mask) {
+            header += "out int gl_SampleMask[];";
+        }
+        break;
+    case Stage::Compute:
+        stage_name = "cs";
+        const u32 local_x{std::max(program.workgroup_size[0], 1u)};
+        const u32 local_y{std::max(program.workgroup_size[1], 1u)};
+        const u32 local_z{std::max(program.workgroup_size[2], 1u)};
+        header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;",
+                              local_x, local_y, local_z);
+        break;
+    }
+    SetupOutPerVertex(*this, header);
+    SetupInPerVertex(*this, header);
+    SetupLegacyInPerFragment(*this, header);
+
+    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+        if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) {
+            continue;
+        }
+        const auto qualifier{uses_geometry_passthrough ? "passthrough"
+                                                       : fmt::format("location={}", index)};
+        header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier,
+                              InterpDecorator(info.interpolation[index]), index,
+                              InputArrayDecorator(stage));
+    }
+    for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+        if (!info.uses_patches[index]) {
+            continue;
+        }
+        const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"};
+        header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index);
+    }
+    if (stage == Stage::Fragment) {
+        for (size_t index = 0; index < info.stores_frag_color.size(); ++index) {
+            if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
+                continue;
+            }
+            header += fmt::format("layout(location={})out vec4 frag_color{};", index, index);
+        }
+    }
+    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+        if (info.stores.Generic(index)) {
+            DefineGenericOutput(index, program.invocations);
+        }
+    }
+    DefineConstantBuffers(bindings);
+    DefineStorageBuffers(bindings);
+    SetupImages(bindings);
+    SetupTextures(bindings);
+    DefineHelperFunctions();
+    DefineConstants();
+}
+
+void EmitContext::SetupExtensions() {
+    header += "#extension GL_ARB_separate_shader_objects : enable\n";
+    if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
+        header += "#extension GL_EXT_texture_shadow_lod : enable\n";
+    }
+    if (info.uses_int64 && profile.support_int64) {
+        header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
+    }
+    if (info.uses_int64_bit_atomics) {
+        header += "#extension GL_NV_shader_atomic_int64 : enable\n";
+    }
+    if (info.uses_atomic_f32_add) {
+        header += "#extension GL_NV_shader_atomic_float : enable\n";
+    }
+    if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
+        header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
+    }
+    if (info.uses_fp16) {
+        if (profile.support_gl_nv_gpu_shader_5) {
+            header += "#extension GL_NV_gpu_shader5 : enable\n";
+        }
+        if (profile.support_gl_amd_gpu_shader_half_float) {
+            header += "#extension GL_AMD_gpu_shader_half_float : enable\n";
+        }
+    }
+    if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
+        info.uses_subgroup_shuffles || info.uses_fswzadd) {
+        header += "#extension GL_ARB_shader_ballot : enable\n"
+                  "#extension GL_ARB_shader_group_vote : enable\n";
+        if (!info.uses_int64 && profile.support_int64) {
+            header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
+        }
+        if (profile.support_gl_warp_intrinsics) {
+            header += "#extension GL_NV_shader_thread_shuffle : enable\n";
+        }
+    }
+    if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) &&
+        profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) {
+        header += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
+    }
+    if (info.uses_sparse_residency && profile.support_gl_sparse_textures) {
+        header += "#extension GL_ARB_sparse_texture2 : enable\n";
+    }
+    if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+        header += "#extension GL_NV_viewport_array2 : enable\n";
+    }
+    if (info.uses_typeless_image_reads) {
+        header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
+    }
+    if (info.uses_derivatives && profile.support_gl_derivative_control) {
+        header += "#extension GL_ARB_derivative_control : enable\n";
+    }
+    if (uses_geometry_passthrough) {
+        header += "#extension GL_NV_geometry_shader_passthrough : enable\n";
+    }
+}
+
+void EmitContext::DefineConstantBuffers(Bindings& bindings) {
+    if (info.constant_buffer_descriptors.empty()) {
+        return;
+    }
+    for (const auto& desc : info.constant_buffer_descriptors) {
+        header += fmt::format(
+            "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
+            bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
+        bindings.uniform_buffer += desc.count;
+    }
+}
+
+void EmitContext::DefineStorageBuffers(Bindings& bindings) {
+    if (info.storage_buffers_descriptors.empty()) {
+        return;
+    }
+    u32 index{};
+    for (const auto& desc : info.storage_buffers_descriptors) {
+        header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};",
+                              bindings.storage_buffer, stage_name, bindings.storage_buffer,
+                              stage_name, index);
+        bindings.storage_buffer += desc.count;
+        index += desc.count;
+    }
+}
+
+void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
+    static constexpr std::string_view swizzle{"xyzw"};
+    const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+    u32 element{0};
+    while (element < 4) {
+        std::string definition{fmt::format("layout(location={}", index)};
+        const u32 remainder{4 - element};
+        const TransformFeedbackVarying* xfb_varying{};
+        if (!runtime_info.xfb_varyings.empty()) {
+            xfb_varying = &runtime_info.xfb_varyings[base_index + element];
+            xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
+        }
+        const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
+        if (element > 0) {
+            definition += fmt::format(",component={}", element);
+        }
+        if (xfb_varying) {
+            definition +=
+                fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
+                            xfb_varying->stride, xfb_varying->offset);
+        }
+        std::string name{fmt::format("out_attr{}", index)};
+        if (num_components < 4 || element > 0) {
+            name += fmt::format("_{}", swizzle.substr(element, num_components));
+        }
+        const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
+        definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
+        header += definition;
+
+        const GenericElementInfo element_info{
+            .name = name,
+            .first_element = element,
+            .num_components = num_components,
+        };
+        std::fill_n(output_generics[index].begin() + element, num_components, element_info);
+        element += num_components;
+    }
+}
+
+void EmitContext::DefineHelperFunctions() {
+    header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
+              "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
+    if (info.uses_global_increment || info.uses_shared_increment) {
+        header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
+    }
+    if (info.uses_global_decrement || info.uses_shared_decrement) {
+        header += "uint CasDecrement(uint op_a,uint op_b){"
+                  "return op_a==0||op_a>op_b?op_b:(op_a-1u);}";
+    }
+    if (info.uses_atomic_f32_add) {
+        header += "uint CasFloatAdd(uint op_a,float op_b){"
+                  "return ftou(utof(op_a)+op_b);}";
+    }
+    if (info.uses_atomic_f32x2_add) {
+        header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){"
+                  "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}";
+    }
+    if (info.uses_atomic_f32x2_min) {
+        header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return "
+                  "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}";
+    }
+    if (info.uses_atomic_f32x2_max) {
+        header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return "
+                  "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}";
+    }
+    if (info.uses_atomic_f16x2_add) {
+        header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return "
+                  "packFloat2x16(unpackFloat2x16(op_a)+op_b);}";
+    }
+    if (info.uses_atomic_f16x2_min) {
+        header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return "
+                  "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}";
+    }
+    if (info.uses_atomic_f16x2_max) {
+        header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return "
+                  "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}";
+    }
+    if (info.uses_atomic_s32_min) {
+        header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
+    }
+    if (info.uses_atomic_s32_max) {
+        header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
+    }
+    if (info.uses_global_memory && profile.support_int64) {
+        header += DefineGlobalMemoryFunctions();
+    }
+    if (info.loads_indexed_attributes) {
+        const bool is_array{stage == Stage::Geometry};
+        const auto vertex_arg{is_array ? ",uint vertex" : ""};
+        std::string func{
+            fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint "
+                        "masked_index=uint(base_index)&3u;switch(base_index>>2){{",
+                        vertex_arg)};
+        if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+            const auto position_idx{is_array ? "gl_in[vertex]." : ""};
+            func += fmt::format("case {}:return {}{}[masked_index];",
+                                static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx,
+                                position_name);
+        }
+        const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+        for (u32 index = 0; index < IR::NUM_GENERICS; ++index) {
+            if (!info.loads.Generic(index)) {
+                continue;
+            }
+            const auto vertex_idx{is_array ? "[vertex]" : ""};
+            func += fmt::format("case {}:return in_attr{}{}[masked_index];",
+                                base_attribute_value + index, index, vertex_idx);
+        }
+        func += "default: return 0.0;}}";
+        header += func;
+    }
+    if (info.stores_indexed_attributes) {
+        // TODO
+    }
+}
+
+std::string EmitContext::DefineGlobalMemoryFunctions() {
+    const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) {
+        const auto& ssbo{info.storage_buffers_descriptors[index]};
+        const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
+        const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
+        const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)};
+        std::array<std::string, 2> addr_xy;
+        std::array<std::string, 2> size_xy;
+        for (size_t i = 0; i < addr_xy.size(); ++i) {
+            const auto addr_loc{ssbo.cbuf_offset + 4 * i};
+            const auto size_loc{size_cbuf_offset + 4 * i};
+            addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
+            size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
+        }
+        const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])};
+        const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
+        func += addr_statment;
+
+        const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])};
+        const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)};
+        const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)};
+        const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)};
+        func += comparison;
+
+        const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
+        func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr);
+    }};
+    std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"};
+    std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"};
+    std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"};
+    std::string load_func{"uint LoadGlobal32(uint64_t addr){"};
+    std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"};
+    std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"};
+    const size_t num_buffers{info.storage_buffers_descriptors.size()};
+    for (size_t index = 0; index < num_buffers; ++index) {
+        if (!info.nvn_buffer_used[index]) {
+            continue;
+        }
+        define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}");
+        define_body(write_func_64, index,
+                    "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}");
+        define_body(write_func_128, index,
+                    "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint("
+                    "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}");
+        define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}");
+        define_body(load_func_64, index,
+                    "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}");
+        define_body(load_func_128, index,
+                    "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
+                    "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
+    }
+    write_func += '}';
+    write_func_64 += '}';
+    write_func_128 += '}';
+    load_func += "return 0u;}";
+    load_func_64 += "return uvec2(0);}";
+    load_func_128 += "return uvec4(0);}";
+    return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128;
+}
+
+void EmitContext::SetupImages(Bindings& bindings) {
+    image_buffers.reserve(info.image_buffer_descriptors.size());
+    for (const auto& desc : info.image_buffer_descriptors) {
+        image_buffers.push_back({bindings.image, desc.count});
+        const auto format{ImageFormatString(desc.format)};
+        const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
+        const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+        header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
+                              bindings.image, format, qualifier, bindings.image, array_decorator);
+        bindings.image += desc.count;
+    }
+    images.reserve(info.image_descriptors.size());
+    for (const auto& desc : info.image_descriptors) {
+        images.push_back({bindings.image, desc.count});
+        const auto format{ImageFormatString(desc.format)};
+        const auto image_type{ImageType(desc.type)};
+        const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
+        const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+        header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
+                              qualifier, image_type, bindings.image, array_decorator);
+        bindings.image += desc.count;
+    }
+}
+
+void EmitContext::SetupTextures(Bindings& bindings) {
+    texture_buffers.reserve(info.texture_buffer_descriptors.size());
+    for (const auto& desc : info.texture_buffer_descriptors) {
+        texture_buffers.push_back({bindings.texture, desc.count});
+        const auto sampler_type{SamplerType(TextureType::Buffer, false)};
+        const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+        header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
+                              sampler_type, bindings.texture, array_decorator);
+        bindings.texture += desc.count;
+    }
+    textures.reserve(info.texture_descriptors.size());
+    for (const auto& desc : info.texture_descriptors) {
+        textures.push_back({bindings.texture, desc.count});
+        const auto sampler_type{SamplerType(desc.type, desc.is_depth)};
+        const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+        header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
+                              sampler_type, bindings.texture, array_decorator);
+        bindings.texture += desc.count;
+    }
+}
+
+void EmitContext::DefineConstants() {
+    if (info.uses_fswzadd) {
+        header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);"
+                  "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);";
+    }
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h
new file mode 100644
index 000000000..d9b639d29
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.h
@@ -0,0 +1,174 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glsl/var_alloc.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+struct Info;
+struct Profile;
+struct RuntimeInfo;
+} // namespace Shader
+
+namespace Shader::Backend {
+struct Bindings;
+}
+
+namespace Shader::IR {
+class Inst;
+struct Program;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+
+struct GenericElementInfo {
+    std::string name;
+    u32 first_element{};
+    u32 num_components{};
+};
+
+struct TextureImageDefinition {
+    u32 binding;
+    u32 count;
+};
+
+class EmitContext {
+public:
+    explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+                         const RuntimeInfo& runtime_info_);
+
+    template <GlslVarType type, typename... Args>
+    void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
+        const auto var_def{var_alloc.AddDefine(inst, type)};
+        if (var_def.empty()) {
+            // skip assigment.
+            code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...);
+        } else {
+            code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...);
+        }
+        // TODO: Remove this
+        code += '\n';
+    }
+
+    template <typename... Args>
+    void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::U1>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::F16x2>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::U32>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::F32>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::U64>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::F64>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::U32x2>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::F32x2>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::U32x3>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::F32x3>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::U32x4>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::F32x4>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::PrecF32>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) {
+        Add<GlslVarType::PrecF64>(format_str, inst, args...);
+    }
+
+    template <typename... Args>
+    void Add(const char* format_str, Args&&... args) {
+        code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
+        // TODO: Remove this
+        code += '\n';
+    }
+
+    std::string header;
+    std::string code;
+    VarAlloc var_alloc;
+    const Info& info;
+    const Profile& profile;
+    const RuntimeInfo& runtime_info;
+
+    Stage stage{};
+    std::string_view stage_name = "invalid";
+    std::string_view position_name = "gl_Position";
+
+    std::vector<TextureImageDefinition> texture_buffers;
+    std::vector<TextureImageDefinition> image_buffers;
+    std::vector<TextureImageDefinition> textures;
+    std::vector<TextureImageDefinition> images;
+    std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
+
+    u32 num_safety_loop_vars{};
+
+    bool uses_y_direction{};
+    bool uses_cc_carry{};
+    bool uses_geometry_passthrough{};
+
+private:
+    void SetupExtensions();
+    void DefineConstantBuffers(Bindings& bindings);
+    void DefineStorageBuffers(Bindings& bindings);
+    void DefineGenericOutput(size_t index, u32 invocations);
+    void DefineHelperFunctions();
+    void DefineConstants();
+    std::string DefineGlobalMemoryFunctions();
+    void SetupImages(Bindings& bindings);
+    void SetupTextures(Bindings& bindings);
+};
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
new file mode 100644
index 000000000..8a430d573
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
@@ -0,0 +1,252 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+#include <type_traits>
+
+#include "common/div_ceil.h"
+#include "common/settings.h"
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+    using ReturnType = ReturnType_;
+
+    static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+    template <size_t I>
+    using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <auto func, typename... Args>
+void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
+    inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
+}
+
+template <typename ArgType>
+auto Arg(EmitContext& ctx, const IR::Value& arg) {
+    if constexpr (std::is_same_v<ArgType, std::string_view>) {
+        return ctx.var_alloc.Consume(arg);
+    } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+        return arg;
+    } else if constexpr (std::is_same_v<ArgType, u32>) {
+        return arg.U32();
+    } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+        return arg.Attribute();
+    } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+        return arg.Patch();
+    } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+        return arg.Reg();
+    }
+}
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+    using Traits = FuncTraits<decltype(func)>;
+    if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
+        if constexpr (is_first_arg_inst) {
+            SetDefinition<func>(
+                ctx, inst, *inst,
+                Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+        } else {
+            SetDefinition<func>(
+                ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+        }
+    } else {
+        if constexpr (is_first_arg_inst) {
+            func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+        } else {
+            func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+        }
+    }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+    using Traits = FuncTraits<decltype(func)>;
+    static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+    if constexpr (Traits::NUM_ARGS == 1) {
+        Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+    } else {
+        using FirstArgType = typename Traits::template ArgType<1>;
+        static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
+        using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+        Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+    }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+    switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...)                                                             \
+    case IR::Opcode::name:                                                                         \
+        return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+    }
+    throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+bool IsReference(IR::Inst& inst) {
+    return inst.GetOpcode() == IR::Opcode::Reference;
+}
+
+void PrecolorInst(IR::Inst& phi) {
+    // Insert phi moves before references to avoid overwritting other phis
+    const size_t num_args{phi.NumArgs()};
+    for (size_t i = 0; i < num_args; ++i) {
+        IR::Block& phi_block{*phi.PhiBlock(i)};
+        auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
+        IR::IREmitter ir{phi_block, it};
+        const IR::Value arg{phi.Arg(i)};
+        if (arg.IsImmediate()) {
+            ir.PhiMove(phi, arg);
+        } else {
+            ir.PhiMove(phi, IR::Value{arg.InstRecursive()});
+        }
+    }
+    for (size_t i = 0; i < num_args; ++i) {
+        IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
+    }
+}
+
+void Precolor(const IR::Program& program) {
+    for (IR::Block* const block : program.blocks) {
+        for (IR::Inst& phi : block->Instructions()) {
+            if (!IR::IsPhi(phi)) {
+                break;
+            }
+            PrecolorInst(phi);
+        }
+    }
+}
+
+void EmitCode(EmitContext& ctx, const IR::Program& program) {
+    for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+        switch (node.type) {
+        case IR::AbstractSyntaxNode::Type::Block:
+            for (IR::Inst& inst : node.data.block->Instructions()) {
+                EmitInst(ctx, &inst);
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::If:
+            ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond));
+            break;
+        case IR::AbstractSyntaxNode::Type::EndIf:
+            ctx.Add("}}");
+            break;
+        case IR::AbstractSyntaxNode::Type::Break:
+            if (node.data.break_node.cond.IsImmediate()) {
+                if (node.data.break_node.cond.U1()) {
+                    ctx.Add("break;");
+                }
+            } else {
+                ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond));
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::Return:
+        case IR::AbstractSyntaxNode::Type::Unreachable:
+            ctx.Add("return;");
+            break;
+        case IR::AbstractSyntaxNode::Type::Loop:
+            ctx.Add("for(;;){{");
+            break;
+        case IR::AbstractSyntaxNode::Type::Repeat:
+            if (Settings::values.disable_shader_loop_safety_checks) {
+                ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond));
+            } else {
+                ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++,
+                        ctx.var_alloc.Consume(node.data.repeat.cond));
+            }
+            break;
+        default:
+            throw NotImplementedException("AbstractSyntaxNode Type {}", node.type);
+        }
+    }
+}
+
+std::string GlslVersionSpecifier(const EmitContext& ctx) {
+    if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) {
+        return " compatibility";
+    }
+    return "";
+}
+
+bool IsPreciseType(GlslVarType type) {
+    switch (type) {
+    case GlslVarType::PrecF32:
+    case GlslVarType::PrecF64:
+        return true;
+    default:
+        return false;
+    }
+}
+
+void DefineVariables(const EmitContext& ctx, std::string& header) {
+    for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) {
+        const auto type{static_cast<GlslVarType>(i)};
+        const auto& tracker{ctx.var_alloc.GetUseTracker(type)};
+        const auto type_name{ctx.var_alloc.GetGlslType(type)};
+        const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug};
+        const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""};
+        // Temps/return types that are never used are stored at index 0
+        if (tracker.uses_temp) {
+            header += fmt::format("{}{} t{}={}(0);", precise, type_name,
+                                  ctx.var_alloc.Representation(0, type), type_name);
+        }
+        for (u32 index = 0; index < tracker.num_used; ++index) {
+            header += fmt::format("{}{} {}={}(0);", precise, type_name,
+                                  ctx.var_alloc.Representation(index, type), type_name);
+        }
+    }
+    for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) {
+        header += fmt::format("int loop{}=0x2000;", i);
+    }
+}
+} // Anonymous namespace
+
+std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
+                     Bindings& bindings) {
+    EmitContext ctx{program, bindings, profile, runtime_info};
+    Precolor(program);
+    EmitCode(ctx, program);
+    const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
+    ctx.header.insert(0, version);
+    if (program.shared_memory_size > 0) {
+        const auto requested_size{program.shared_memory_size};
+        const auto max_size{profile.gl_max_compute_smem_size};
+        const bool needs_clamp{requested_size > max_size};
+        if (needs_clamp) {
+            LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})",
+                        requested_size, max_size);
+        }
+        const auto smem_size{needs_clamp ? max_size : requested_size};
+        ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U));
+    }
+    ctx.header += "void main(){\n";
+    if (program.local_memory_size > 0) {
+        ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U));
+    }
+    DefineVariables(ctx, ctx.header);
+    if (ctx.uses_cc_carry) {
+        ctx.header += "uint carry;";
+    }
+    if (program.info.uses_subgroup_shuffles) {
+        ctx.header += "bool shfl_in_bounds;";
+    }
+    ctx.code.insert(0, ctx.header);
+    ctx.code += '}';
+    return ctx.code;
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h
new file mode 100644
index 000000000..20e5719e6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.h
@@ -0,0 +1,24 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+
+[[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info,
+                                   IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) {
+    Bindings binding;
+    return EmitGLSL(profile, {}, program, binding);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
new file mode 100644
index 000000000..772acc5a4
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -0,0 +1,418 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{
+    "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"};
+
+void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+                       std::string_view value, std::string_view function) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    const std::string smem{fmt::format("smem[{}>>2]", offset)};
+    ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret);
+}
+
+void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                     const IR::Value& offset, std::string_view value, std::string_view function) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
+                                       ctx.var_alloc.Consume(offset))};
+    ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
+}
+
+void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        const IR::Value& offset, std::string_view value,
+                        std::string_view function) {
+    const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
+                                       ctx.var_alloc.Consume(offset))};
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
+    ctx.AddF32("{}=utof({});", inst, ret);
+}
+} // Anonymous namespace
+
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value) {
+    ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value) {
+    const std::string u32_value{fmt::format("uint({})", value)};
+    SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32");
+}
+
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value) {
+    ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value) {
+    const std::string u32_value{fmt::format("uint({})", value)};
+    SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32");
+}
+
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value) {
+    ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                           std::string_view value) {
+    SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement");
+}
+
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                           std::string_view value) {
+    SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement");
+}
+
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                           std::string_view value) {
+    ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                          std::string_view value) {
+    ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                           std::string_view value) {
+    ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                                std::string_view value) {
+    ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                                std::string_view value) {
+    LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+    ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
+               pointer_offset);
+    ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
+            pointer_offset, value, pointer_offset, value);
+}
+
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    const std::string u32_value{fmt::format("uint({})", value)};
+    SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32");
+}
+
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    const std::string u32_value{fmt::format("uint({})", value)};
+    SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32");
+}
+
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value) {
+    SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement");
+}
+
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value) {
+    SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement");
+}
+
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value) {
+    ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           const IR::Value& offset, std::string_view value) {
+    ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value) {
+    ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 const IR::Value& offset, std::string_view value) {
+    ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
+    ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+    ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
+    ctx.Add("for(int i=0;i<2;++i){{ "
+            "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
+            ");}}",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
+    ctx.Add("for(int i=0;i<2;++i){{ "
+            "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+    ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
+    ctx.Add("for(int i=0;i<2;++i){{ "
+            "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
+            ");}}",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
+    ctx.Add("for(int "
+            "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}"
+            "))[i]);}}",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value) {
+    ctx.AddU64(
+        "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
+        "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+        inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+        binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           const IR::Value& offset, std::string_view value) {
+    ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
+               "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+               inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value) {
+    ctx.AddU64(
+        "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
+        "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+        inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+        binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 const IR::Value& offset, std::string_view value) {
+    ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
+               "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+               inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value) {
+    SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
+}
+
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value) {
+    SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2");
+}
+
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value) {
+    SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2");
+}
+
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value) {
+    SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2");
+}
+
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value) {
+    SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2");
+}
+
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value) {
+    SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2");
+}
+
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value) {
+    SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2");
+}
+
+void EmitGlobalAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicInc32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicDec32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicOr32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicXor32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicIAdd64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMin64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMin64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMax64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMax64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicInc64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicDec64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAnd64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicOr64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicXor64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicExchange64(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF32(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF16x2(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF32x2(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMinF16x2(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMinF32x2(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMaxF16x2(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMaxF32x2(EmitContext&) {
+    throw NotImplementedException("GLSL Instrucion");
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
new file mode 100644
index 000000000..e1d1b558e
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitBarrier(EmitContext& ctx) {
+    ctx.Add("barrier();");
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+    ctx.Add("groupMemoryBarrier();");
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+    ctx.Add("memoryBarrier();");
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
new file mode 100644
index 000000000..3c1714e89
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -0,0 +1,94 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void Alias(IR::Inst& inst, const IR::Value& value) {
+    if (value.IsImmediate()) {
+        return;
+    }
+    IR::Inst& value_inst{*value.InstRecursive()};
+    value_inst.DestructiveAddUsage(inst.UseCount());
+    value_inst.DestructiveRemoveUsage();
+    inst.SetDefinition(value_inst.Definition<Id>());
+}
+} // Anonymous namespace
+
+void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
+    // Fake one usage to get a real variable out of the condition
+    inst.DestructiveAddUsage(1);
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
+    const auto input{ctx.var_alloc.Consume(value)};
+    if (ret != input) {
+        ctx.Add("{}={};", ret, input);
+    }
+}
+
+void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
+    NotImplemented();
+}
+
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=ftou({});", inst, value);
+}
+
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU64("{}=doubleBitsToUint64({});", inst, value);
+}
+
+void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
+    NotImplemented();
+}
+
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=utof({});", inst, value);
+}
+
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=uint64BitsToDouble({});", inst, value);
+}
+
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU64("{}=packUint2x32({});", inst, value);
+}
+
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32x2("{}=unpackUint2x32({});", inst, value);
+}
+
+void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=packFloat2x16({});", inst, value);
+}
+
+void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value);
+}
+
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=packHalf2x16({});", inst, value);
+}
+
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value);
+}
+
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=packDouble2x32({});", inst, value);
+}
+
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
new file mode 100644
index 000000000..49a66e3ec
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
@@ -0,0 +1,219 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr std::string_view SWIZZLE{"xyzw"};
+void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite,
+                     std::string_view object, u32 index) {
+    if (result == composite) {
+        // The result is aliased with the composite
+        ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+    } else {
+        ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object);
+    }
+}
+} // Anonymous namespace
+
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2) {
+    ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2);
+}
+
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2, std::string_view e3) {
+    ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2, std::string_view e3, std::string_view e4) {
+    ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index) {
+    ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index) {
+    ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index) {
+    ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
+    CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)};
+    CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)};
+    CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx,
+                                 [[maybe_unused]] std::string_view e1,
+                                 [[maybe_unused]] std::string_view e2) {
+    NotImplemented();
+}
+
+void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx,
+                                 [[maybe_unused]] std::string_view e1,
+                                 [[maybe_unused]] std::string_view e2,
+                                 [[maybe_unused]] std::string_view e3) {
+    NotImplemented();
+}
+
+void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx,
+                                 [[maybe_unused]] std::string_view e1,
+                                 [[maybe_unused]] std::string_view e2,
+                                 [[maybe_unused]] std::string_view e3,
+                                 [[maybe_unused]] std::string_view e4) {
+    NotImplemented();
+}
+
+void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
+                               [[maybe_unused]] std::string_view composite,
+                               [[maybe_unused]] u32 index) {
+    NotImplemented();
+}
+
+void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
+                               [[maybe_unused]] std::string_view composite,
+                               [[maybe_unused]] u32 index) {
+    NotImplemented();
+}
+
+void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
+                               [[maybe_unused]] std::string_view composite,
+                               [[maybe_unused]] u32 index) {
+    NotImplemented();
+}
+
+void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] std::string_view composite,
+                              [[maybe_unused]] std::string_view object,
+                              [[maybe_unused]] u32 index) {
+    NotImplemented();
+}
+
+void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] std::string_view composite,
+                              [[maybe_unused]] std::string_view object,
+                              [[maybe_unused]] u32 index) {
+    NotImplemented();
+}
+
+void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] std::string_view composite,
+                              [[maybe_unused]] std::string_view object,
+                              [[maybe_unused]] u32 index) {
+    NotImplemented();
+}
+
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2) {
+    ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2);
+}
+
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2, std::string_view e3) {
+    ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2, std::string_view e3, std::string_view e4) {
+    ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index) {
+    ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index) {
+    ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index) {
+    ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)};
+    CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)};
+    CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+    CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index) {
+    ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+
+void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index) {
+    ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+
+void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index) {
+    ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
new file mode 100644
index 000000000..580063fa9
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -0,0 +1,456 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char SWIZZLE[]{"xyzw"};
+
+u32 CbufIndex(u32 offset) {
+    return (offset / 4) % 4;
+}
+
+char OffsetSwizzle(u32 offset) {
+    return SWIZZLE[CbufIndex(offset)];
+}
+
+bool IsInputArray(Stage stage) {
+    return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+           stage == Stage::TessellationEval;
+}
+
+std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) {
+    return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
+}
+
+std::string_view OutputVertexIndex(EmitContext& ctx) {
+    return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
+}
+
+void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
+             const IR::Value& offset, u32 num_bits, std::string_view cast = {},
+             std::string_view bit_offset = {}) {
+    const bool is_immediate{offset.IsImmediate()};
+    const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug};
+    if (is_immediate) {
+        const s32 signed_offset{static_cast<s32>(offset.U32())};
+        static constexpr u32 cbuf_size{0x10000};
+        if (signed_offset < 0 || offset.U32() > cbuf_size) {
+            LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
+            ctx.Add("{}=0u;", ret);
+            return;
+        }
+    }
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16)
+                                  : fmt::format("{}>>4", offset_var)};
+    const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
+                                    : fmt::format("[({}>>2)%4]", offset_var)};
+
+    const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+    const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
+    const auto extraction{num_bits == 32 ? cbuf_cast
+                                         : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast,
+                                                        bit_offset, num_bits)};
+    if (!component_indexing_bug) {
+        const auto result{fmt::format(fmt::runtime(extraction), swizzle)};
+        ctx.Add("{}={};", ret, result);
+        return;
+    }
+    const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
+    for (u32 i = 0; i < 4; ++i) {
+        const auto swizzle_string{fmt::format(".{}", "xyzw"[i])};
+        const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)};
+        ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result);
+    }
+}
+
+void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
+              std::string_view cast) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    if (offset.IsImmediate()) {
+        const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)};
+        GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
+    } else {
+        const auto offset_var{ctx.var_alloc.Consume(offset)};
+        const auto bit_offset{fmt::format("({}%4)*8", offset_var)};
+        GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
+    }
+}
+
+void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
+               std::string_view cast) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    if (offset.IsImmediate()) {
+        const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)};
+        GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
+    } else {
+        const auto offset_var{ctx.var_alloc.Consume(offset)};
+        const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)};
+        GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
+    }
+}
+
+u32 TexCoordIndex(IR::Attribute attr) {
+    return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
+}
+} // Anonymous namespace
+
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                   const IR::Value& offset) {
+    GetCbuf8(ctx, inst, binding, offset, "ftou");
+}
+
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                   const IR::Value& offset) {
+    GetCbuf8(ctx, inst, binding, offset, "ftoi");
+}
+
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                    const IR::Value& offset) {
+    GetCbuf16(ctx, inst, binding, offset, "ftou");
+}
+
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                    const IR::Value& offset) {
+    GetCbuf16(ctx, inst, binding, offset, "ftoi");
+}
+
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                    const IR::Value& offset) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    GetCbuf(ctx, ret, binding, offset, 32, "ftou");
+}
+
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                    const IR::Value& offset) {
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
+    GetCbuf(ctx, ret, binding, offset, 32);
+}
+
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                      const IR::Value& offset) {
+    const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+    if (offset.IsImmediate()) {
+        static constexpr u32 cbuf_size{0x10000};
+        const u32 u32_offset{offset.U32()};
+        const s32 signed_offset{static_cast<s32>(offset.U32())};
+        if (signed_offset < 0 || u32_offset > cbuf_size) {
+            LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
+            ctx.AddU32x2("{}=uvec2(0u);", inst);
+            return;
+        }
+        if (u32_offset % 2 == 0) {
+            ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
+                         OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
+        } else {
+            ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
+                         OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
+                         OffsetSwizzle(u32_offset + 4));
+        }
+        return;
+    }
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    if (!ctx.profile.has_gl_component_indexing_bug) {
+        ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
+                     inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
+        return;
+    }
+    const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
+    const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
+    for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+        ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
+                swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
+                "xyzw"[(swizzle + 1) % 4]);
+    }
+}
+
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
+                      std::string_view vertex) {
+    const u32 element{static_cast<u32>(attr) % 4};
+    const char swizzle{"xyzw"[element]};
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+            if (element == 3) {
+                ctx.AddF32("{}=1.f;", inst, attr);
+            } else {
+                ctx.AddF32("{}=0.f;", inst, attr);
+            }
+            return;
+        }
+        ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle);
+        return;
+    }
+    // GLSL only exposes 8 legacy texcoords
+    if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
+        LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
+                    TexCoordIndex(attr));
+        ctx.AddF32("{}=0.f;", inst);
+        return;
+    }
+    if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
+        const u32 index{TexCoordIndex(attr)};
+        ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle);
+        return;
+    }
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        ctx.AddF32("{}=itof(gl_PrimitiveID);", inst);
+        break;
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW: {
+        const bool is_array{IsInputArray(ctx.stage)};
+        const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""};
+        ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle);
+        break;
+    }
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA:
+        if (ctx.stage == Stage::Fragment) {
+            ctx.AddF32("{}=gl_Color.{};", inst, swizzle);
+        } else {
+            ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle);
+        }
+        break;
+    case IR::Attribute::PointSpriteS:
+    case IR::Attribute::PointSpriteT:
+        ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle);
+        break;
+    case IR::Attribute::TessellationEvaluationPointU:
+    case IR::Attribute::TessellationEvaluationPointV:
+        ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle);
+        break;
+    case IR::Attribute::InstanceId:
+        ctx.AddF32("{}=itof(gl_InstanceID);", inst);
+        break;
+    case IR::Attribute::VertexId:
+        ctx.AddF32("{}=itof(gl_VertexID);", inst);
+        break;
+    case IR::Attribute::FrontFace:
+        ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
+        break;
+    default:
+        throw NotImplementedException("Get attribute {}", attr);
+    }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
+                      [[maybe_unused]] std::string_view vertex) {
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        const u32 attr_element{IR::GenericAttributeElement(attr)};
+        const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
+        const auto output_decorator{OutputVertexIndex(ctx)};
+        if (info.num_components == 1) {
+            ctx.Add("{}{}={};", info.name, output_decorator, value);
+        } else {
+            const u32 index_element{attr_element - info.first_element};
+            ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value);
+        }
+        return;
+    }
+    const u32 element{static_cast<u32>(attr) % 4};
+    const char swizzle{"xyzw"[element]};
+    // GLSL only exposes 8 legacy texcoords
+    if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
+        LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
+                    TexCoordIndex(attr));
+        return;
+    }
+    if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
+        const u32 index{TexCoordIndex(attr)};
+        ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value);
+        return;
+    }
+    switch (attr) {
+    case IR::Attribute::Layer:
+        if (ctx.stage != Stage::Geometry &&
+            !ctx.profile.support_viewport_index_layer_non_geometry) {
+            LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support "
+                                     "viewport layer extension");
+            break;
+        }
+        ctx.Add("gl_Layer=ftoi({});", value);
+        break;
+    case IR::Attribute::ViewportIndex:
+        if (ctx.stage != Stage::Geometry &&
+            !ctx.profile.support_viewport_index_layer_non_geometry) {
+            LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support "
+                                     "viewport layer extension");
+            break;
+        }
+        ctx.Add("gl_ViewportIndex=ftoi({});", value);
+        break;
+    case IR::Attribute::ViewportMask:
+        if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) {
+            LOG_WARNING(
+                Shader_GLSL,
+                "Shader stores viewport mask but device does not support viewport mask extension");
+            break;
+        }
+        ctx.Add("gl_ViewportMask[0]=ftoi({});", value);
+        break;
+    case IR::Attribute::PointSize:
+        ctx.Add("gl_PointSize={};", value);
+        break;
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        ctx.Add("gl_Position.{}={};", swizzle, value);
+        break;
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA:
+        ctx.Add("gl_FrontColor.{}={};", swizzle, value);
+        break;
+    case IR::Attribute::ColorFrontSpecularR:
+    case IR::Attribute::ColorFrontSpecularG:
+    case IR::Attribute::ColorFrontSpecularB:
+    case IR::Attribute::ColorFrontSpecularA:
+        ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value);
+        break;
+    case IR::Attribute::ColorBackDiffuseR:
+    case IR::Attribute::ColorBackDiffuseG:
+    case IR::Attribute::ColorBackDiffuseB:
+    case IR::Attribute::ColorBackDiffuseA:
+        ctx.Add("gl_BackColor.{}={};", swizzle, value);
+        break;
+    case IR::Attribute::ColorBackSpecularR:
+    case IR::Attribute::ColorBackSpecularG:
+    case IR::Attribute::ColorBackSpecularB:
+    case IR::Attribute::ColorBackSpecularA:
+        ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
+        break;
+    case IR::Attribute::FogCoordinate:
+        ctx.Add("gl_FogFragCoord={};", value);
+        break;
+    case IR::Attribute::ClipDistance0:
+    case IR::Attribute::ClipDistance1:
+    case IR::Attribute::ClipDistance2:
+    case IR::Attribute::ClipDistance3:
+    case IR::Attribute::ClipDistance4:
+    case IR::Attribute::ClipDistance5:
+    case IR::Attribute::ClipDistance6:
+    case IR::Attribute::ClipDistance7: {
+        const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
+        ctx.Add("gl_ClipDistance[{}]={};", index, value);
+        break;
+    }
+    default:
+        throw NotImplementedException("Set attribute {}", attr);
+    }
+}
+
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+                             std::string_view vertex) {
+    const bool is_array{ctx.stage == Stage::Geometry};
+    const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""};
+    ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg);
+}
+
+void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx,
+                             [[maybe_unused]] std::string_view offset,
+                             [[maybe_unused]] std::string_view value,
+                             [[maybe_unused]] std::string_view vertex) {
+    NotImplemented();
+}
+
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
+    if (!IR::IsGeneric(patch)) {
+        throw NotImplementedException("Non-generic patch load");
+    }
+    const u32 index{IR::GenericPatchIndex(patch)};
+    const u32 element{IR::GenericPatchElement(patch)};
+    const char swizzle{"xyzw"[element]};
+    ctx.AddF32("{}=patch{}.{};", inst, index, swizzle);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) {
+    if (IR::IsGeneric(patch)) {
+        const u32 index{IR::GenericPatchIndex(patch)};
+        const u32 element{IR::GenericPatchElement(patch)};
+        ctx.Add("patch{}.{}={};", index, "xyzw"[element], value);
+        return;
+    }
+    switch (patch) {
+    case IR::Patch::TessellationLodLeft:
+    case IR::Patch::TessellationLodRight:
+    case IR::Patch::TessellationLodTop:
+    case IR::Patch::TessellationLodBottom: {
+        const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+        ctx.Add("gl_TessLevelOuter[{}]={};", index, value);
+        break;
+    }
+    case IR::Patch::TessellationLodInteriorU:
+        ctx.Add("gl_TessLevelInner[0]={};", value);
+        break;
+    case IR::Patch::TessellationLodInteriorV:
+        ctx.Add("gl_TessLevelInner[1]={};", value);
+        break;
+    default:
+        throw NotImplementedException("Patch {}", patch);
+    }
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) {
+    const char swizzle{"xyzw"[component]};
+    ctx.Add("frag_color{}.{}={};", index, swizzle, value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, std::string_view value) {
+    ctx.Add("gl_SampleMask[0]=int({});", value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, std::string_view value) {
+    ctx.Add("gl_FragDepth={};", value);
+}
+
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32x3("{}=gl_LocalInvocationID;", inst);
+}
+
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32x3("{}=gl_WorkGroupID;", inst);
+}
+
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=uint(gl_InvocationID);", inst);
+}
+
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=uint(gl_SampleID);", inst);
+}
+
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU1("{}=gl_HelperInvocation;", inst);
+}
+
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
+    ctx.uses_y_direction = true;
+    ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
+}
+
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
+    ctx.AddU32("{}=lmem[{}];", inst, word_offset);
+}
+
+void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) {
+    ctx.Add("lmem[{}]={};", word_offset, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
new file mode 100644
index 000000000..53f8896be
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitJoin(EmitContext&) {
+    throw NotImplementedException("Join shouldn't be emitted");
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+    ctx.Add("discard;");
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
new file mode 100644
index 000000000..eeae6562c
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
@@ -0,0 +1,230 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value);
+}
+
+void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=int({});", inst, value);
+}
+
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=int({});", inst, value);
+}
+
+void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU64("{}=int64_t({});", inst, value);
+}
+
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU64("{}=int64_t({});", inst, value);
+}
+
+void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=double({});", inst, value);
+}
+
+void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                      [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                      [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                      [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=float(int({}));", inst, value);
+}
+
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=float(int64_t({}));", inst, value);
+}
+
+void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                      [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=float({}&0xffff);", inst, value);
+}
+
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                      [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=double(int({}));", inst, value);
+}
+
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=double(int64_t({}));", inst, value);
+}
+
+void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                      [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=double({});", inst, value);
+}
+
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=double({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
new file mode 100644
index 000000000..d423bfb1b
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
@@ -0,0 +1,456 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs,
+             std::string_view op, bool ordered) {
+    const auto nan_op{ordered ? "&&!" : "||"};
+    ctx.AddU1("{}={}{}{}"
+              "{}isnan({}){}isnan({});",
+              inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs);
+}
+
+bool IsPrecise(const IR::Inst& inst) {
+    return inst.Flags<IR::FpControl>().no_contraction;
+}
+} // Anonymous namespace
+
+void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=abs({});", inst, value);
+}
+
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=abs({});", inst, value);
+}
+
+void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
+    NotImplemented();
+}
+
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    if (IsPrecise(inst)) {
+        ctx.AddPrecF32("{}={}+{};", inst, a, b);
+    } else {
+        ctx.AddF32("{}={}+{};", inst, a, b);
+    }
+}
+
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    if (IsPrecise(inst)) {
+        ctx.AddPrecF64("{}={}+{};", inst, a, b);
+    } else {
+        ctx.AddF64("{}={}+{};", inst, a, b);
+    }
+}
+
+void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b,
+                 [[maybe_unused]] std::string_view c) {
+    NotImplemented();
+}
+
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+                 std::string_view c) {
+    if (IsPrecise(inst)) {
+        ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c);
+    } else {
+        ctx.AddF32("{}=fma({},{},{});", inst, a, b, c);
+    }
+}
+
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+                 std::string_view c) {
+    if (IsPrecise(inst)) {
+        ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c);
+    } else {
+        ctx.AddF64("{}=fma({},{},{});", inst, a, b, c);
+    }
+}
+
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddF32("{}=max({},{});", inst, a, b);
+}
+
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddF64("{}=max({},{});", inst, a, b);
+}
+
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddF32("{}=min({},{});", inst, a, b);
+}
+
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddF64("{}=min({},{});", inst, a, b);
+}
+
+void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
+    NotImplemented();
+}
+
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    if (IsPrecise(inst)) {
+        ctx.AddPrecF32("{}={}*{};", inst, a, b);
+    } else {
+        ctx.AddF32("{}={}*{};", inst, a, b);
+    }
+}
+
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    if (IsPrecise(inst)) {
+        ctx.AddPrecF64("{}={}*{};", inst, a, b);
+    } else {
+        ctx.AddF64("{}={}*{};", inst, a, b);
+    }
+}
+
+void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                 [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=-({});", inst, value);
+}
+
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=-({});", inst, value);
+}
+
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=sin({});", inst, value);
+}
+
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=cos({});", inst, value);
+}
+
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=exp2({});", inst, value);
+}
+
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=log2({});", inst, value);
+}
+
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=(1.0f)/{};", inst, value);
+}
+
+void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=1.0/{};", inst, value);
+}
+
+void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    ctx.AddF32("{}=inversesqrt({});", inst, value);
+}
+
+void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=sqrt({});", inst, value);
+}
+
+void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                      [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value);
+}
+
+void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value);
+}
+
+void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                   [[maybe_unused]] std::string_view value,
+                   [[maybe_unused]] std::string_view min_value,
+                   [[maybe_unused]] std::string_view max_value) {
+    NotImplemented();
+}
+
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                   std::string_view min_value, std::string_view max_value) {
+    // GLSL's clamp does not produce desirable results
+    ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value);
+}
+
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                   std::string_view min_value, std::string_view max_value) {
+    // GLSL's clamp does not produce desirable results
+    ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value);
+}
+
+void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                       [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=roundEven({});", inst, value);
+}
+
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=roundEven({});", inst, value);
+}
+
+void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                   [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=floor({});", inst, value);
+}
+
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=floor({});", inst, value);
+}
+
+void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                  [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=ceil({});", inst, value);
+}
+
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=ceil({});", inst, value);
+}
+
+void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                   [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF32("{}=trunc({});", inst, value);
+}
+
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddF64("{}=trunc({});", inst, value);
+}
+
+void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+                      [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                      std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "==", true);
+}
+
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                      std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "==", true);
+}
+
+void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+                        [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                        std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "==", false);
+}
+
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                        std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "==", false);
+}
+
+void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+                         [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                         std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "!=", true);
+}
+
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                         std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "!=", true);
+}
+
+void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+                           [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "!=", false);
+}
+
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "!=", false);
+}
+
+void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+                         [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                         std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "<", true);
+}
+
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                         std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "<", true);
+}
+
+void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+                           [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "<", false);
+}
+
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "<", false);
+}
+
+void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx,
+                            [[maybe_unused]] std::string_view lhs,
+                            [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                            std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, ">", true);
+}
+
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                            std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, ">", true);
+}
+
+void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] std::string_view lhs,
+                              [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                              std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, ">", false);
+}
+
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                              std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, ">", false);
+}
+
+void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx,
+                              [[maybe_unused]] std::string_view lhs,
+                              [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                              std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "<=", true);
+}
+
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                              std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "<=", true);
+}
+
+void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx,
+                                [[maybe_unused]] std::string_view lhs,
+                                [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "<=", false);
+}
+
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, "<=", false);
+}
+
+void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
+                                 [[maybe_unused]] std::string_view lhs,
+                                 [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                 std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, ">=", true);
+}
+
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                 std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, ">=", true);
+}
+
+void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
+                                   [[maybe_unused]] std::string_view lhs,
+                                   [[maybe_unused]] std::string_view rhs) {
+    NotImplemented();
+}
+
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                   std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, ">=", false);
+}
+
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                   std::string_view rhs) {
+    Compare(ctx, inst, lhs, rhs, ">=", false);
+}
+
+void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                   [[maybe_unused]] std::string_view value) {
+    NotImplemented();
+}
+
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU1("{}=isnan({});", inst, value);
+}
+
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU1("{}=isnan({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
new file mode 100644
index 000000000..447eb8e0a
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -0,0 +1,799 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
+    const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index)
+                                                    : ctx.textures.at(info.descriptor_index)};
+    const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
+    return fmt::format("tex{}{}", def.binding, index_offset);
+}
+
+std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
+    const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index)
+                                                    : ctx.images.at(info.descriptor_index)};
+    const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
+    return fmt::format("img{}{}", def.binding, index_offset);
+}
+
+std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) {
+    switch (info.type) {
+    case TextureType::Color1D:
+    case TextureType::Buffer:
+        return fmt::format("int({})", value);
+    case TextureType::ColorArray1D:
+    case TextureType::Color2D:
+    case TextureType::ColorArray2D:
+        return fmt::format("ivec2({})", value);
+    case TextureType::Color3D:
+    case TextureType::ColorCube:
+        return fmt::format("ivec3({})", value);
+    case TextureType::ColorArrayCube:
+        return fmt::format("ivec4({})", value);
+    default:
+        throw NotImplementedException("Integer cast for TextureType {}", info.type.Value());
+    }
+}
+
+std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) {
+    switch (info.type) {
+    case TextureType::Color1D:
+    case TextureType::Buffer:
+        return fmt::format("int({})", value);
+    case TextureType::ColorArray1D:
+    case TextureType::Color2D:
+        return fmt::format("ivec2({})", value);
+    case TextureType::ColorArray2D:
+    case TextureType::Color3D:
+    case TextureType::ColorCube:
+        return fmt::format("ivec3({})", value);
+    case TextureType::ColorArrayCube:
+        return fmt::format("ivec4({})", value);
+    default:
+        throw NotImplementedException("TexelFetchCast type {}", info.type.Value());
+    }
+}
+
+bool NeedsShadowLodExt(TextureType type) {
+    switch (type) {
+    case TextureType::ColorArray2D:
+    case TextureType::ColorCube:
+    case TextureType::ColorArrayCube:
+        return true;
+    default:
+        return false;
+    }
+}
+
+std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) {
+    if (offset.IsImmediate()) {
+        return fmt::format("int({})", offset.U32());
+    }
+    IR::Inst* const inst{offset.InstRecursive()};
+    if (inst->AreAllArgsImmediates()) {
+        switch (inst->GetOpcode()) {
+        case IR::Opcode::CompositeConstructU32x2:
+            return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32());
+        case IR::Opcode::CompositeConstructU32x3:
+            return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
+                               inst->Arg(2).U32());
+        case IR::Opcode::CompositeConstructU32x4:
+            return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
+                               inst->Arg(2).U32(), inst->Arg(3).U32());
+        default:
+            break;
+        }
+    }
+    const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi};
+    if (!has_var_aoffi) {
+        LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING");
+    }
+    const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"};
+    switch (offset.Type()) {
+    case IR::Type::U32:
+        return fmt::format("int({})", offset_str);
+    case IR::Type::U32x2:
+        return fmt::format("ivec2({})", offset_str);
+    case IR::Type::U32x3:
+        return fmt::format("ivec3({})", offset_str);
+    case IR::Type::U32x4:
+        return fmt::format("ivec4({})", offset_str);
+    default:
+        throw NotImplementedException("Offset type {}", offset.Type());
+    }
+}
+
+std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) {
+    const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
+    if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
+        LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING");
+        return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))";
+    }
+    const IR::Opcode opcode{values[0]->GetOpcode()};
+    if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
+        throw LogicError("Invalid PTP arguments");
+    }
+    auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
+
+    return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0),
+                       read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2),
+                       read(1, 3));
+}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+    if (sparse_inst) {
+        sparse_inst->Invalidate();
+    }
+    return sparse_inst;
+}
+} // Anonymous namespace
+
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                std::string_view coords, std::string_view bias_lc,
+                                const IR::Value& offset) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    if (info.has_lod_clamp) {
+        throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples");
+    }
+    const auto texture{Texture(ctx, info, index)};
+    const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
+    const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+    if (sparse_inst && !supports_sparse) {
+        LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+        ctx.AddU1("{}=true;", *sparse_inst);
+    }
+    if (!sparse_inst || !supports_sparse) {
+        if (!offset.IsEmpty()) {
+            const auto offset_str{GetOffsetVec(ctx, offset)};
+            if (ctx.stage == Stage::Fragment) {
+                ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias);
+            } else {
+                ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str);
+            }
+        } else {
+            if (ctx.stage == Stage::Fragment) {
+                ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias);
+            } else {
+                ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords);
+            }
+        }
+        return;
+    }
+    if (!offset.IsEmpty()) {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));",
+                  *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias);
+    } else {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst,
+                  texture, coords, texel, bias);
+    }
+}
+
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                std::string_view coords, std::string_view lod_lc,
+                                const IR::Value& offset) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    if (info.has_bias) {
+        throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples");
+    }
+    if (info.has_lod_clamp) {
+        throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples");
+    }
+    const auto texture{Texture(ctx, info, index)};
+    const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+    if (sparse_inst && !supports_sparse) {
+        LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+        ctx.AddU1("{}=true;", *sparse_inst);
+    }
+    if (!sparse_inst || !supports_sparse) {
+        if (!offset.IsEmpty()) {
+            ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc,
+                    GetOffsetVec(ctx, offset));
+        } else {
+            ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc);
+        }
+        return;
+    }
+    if (!offset.IsEmpty()) {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
+                  *sparse_inst, texture, CastToIntVec(coords, info), lod_lc,
+                  GetOffsetVec(ctx, offset), texel);
+    } else {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst,
+                  texture, coords, lod_lc, texel);
+    }
+}
+
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                    std::string_view coords, std::string_view dref,
+                                    std::string_view bias_lc, const IR::Value& offset) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto sparse_inst{PrepareSparse(inst)};
+    if (sparse_inst) {
+        throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples");
+    }
+    if (info.has_bias) {
+        throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples");
+    }
+    if (info.has_lod_clamp) {
+        throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples");
+    }
+    const auto texture{Texture(ctx, info, index)};
+    const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
+    const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
+    const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
+    const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod &&
+                        ctx.stage != Stage::Fragment && needs_shadow_ext};
+    if (use_grad) {
+        LOG_WARNING(Shader_GLSL,
+                    "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
+        if (info.type == TextureType::ColorArrayCube) {
+            LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
+            ctx.AddF32("{}=0.0f;", inst);
+            return;
+        }
+        const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
+        ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
+                   d_cast, d_cast);
+        return;
+    }
+    if (!offset.IsEmpty()) {
+        const auto offset_str{GetOffsetVec(ctx, offset)};
+        if (ctx.stage == Stage::Fragment) {
+            ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref,
+                       offset_str, bias);
+        } else {
+            ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords,
+                       dref, offset_str);
+        }
+    } else {
+        if (ctx.stage == Stage::Fragment) {
+            if (info.type == TextureType::ColorArrayCube) {
+                ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref);
+            } else {
+                ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
+            }
+        } else {
+            ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref);
+        }
+    }
+}
+
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                    std::string_view coords, std::string_view dref,
+                                    std::string_view lod_lc, const IR::Value& offset) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto sparse_inst{PrepareSparse(inst)};
+    if (sparse_inst) {
+        throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples");
+    }
+    if (info.has_bias) {
+        throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples");
+    }
+    if (info.has_lod_clamp) {
+        throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples");
+    }
+    const auto texture{Texture(ctx, info, index)};
+    const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
+    const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext};
+    const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
+    if (use_grad) {
+        LOG_WARNING(Shader_GLSL,
+                    "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
+        if (info.type == TextureType::ColorArrayCube) {
+            LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
+            ctx.AddF32("{}=0.0f;", inst);
+            return;
+        }
+        const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
+        ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
+                   d_cast, d_cast);
+        return;
+    }
+    if (!offset.IsEmpty()) {
+        const auto offset_str{GetOffsetVec(ctx, offset)};
+        if (info.type == TextureType::ColorArrayCube) {
+            ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc,
+                       offset_str);
+        } else {
+            ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords,
+                       dref, lod_lc, offset_str);
+        }
+    } else {
+        if (info.type == TextureType::ColorArrayCube) {
+            ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc);
+        } else {
+            ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref,
+                       lod_lc);
+        }
+    }
+}
+
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                     std::string_view coords, const IR::Value& offset, const IR::Value& offset2) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto texture{Texture(ctx, info, index)};
+    const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+    if (sparse_inst && !supports_sparse) {
+        LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+        ctx.AddU1("{}=true;", *sparse_inst);
+    }
+    if (!sparse_inst || !supports_sparse) {
+        if (offset.IsEmpty()) {
+            ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords,
+                    info.gather_component);
+            return;
+        }
+        if (offset2.IsEmpty()) {
+            ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords,
+                    GetOffsetVec(ctx, offset), info.gather_component);
+            return;
+        }
+        // PTP
+        const auto offsets{PtpOffsets(offset, offset2)};
+        ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets,
+                info.gather_component);
+        return;
+    }
+    if (offset.IsEmpty()) {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));",
+                  *sparse_inst, texture, coords, texel, info.gather_component);
+        return;
+    }
+    if (offset2.IsEmpty()) {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
+                  *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset),
+                  texel, info.gather_component);
+        return;
+    }
+    // PTP
+    const auto offsets{PtpOffsets(offset, offset2)};
+    ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
+              *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel,
+              info.gather_component);
+}
+
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                         std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
+                         std::string_view dref) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto texture{Texture(ctx, info, index)};
+    const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+    if (sparse_inst && !supports_sparse) {
+        LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+        ctx.AddU1("{}=true;", *sparse_inst);
+    }
+    if (!sparse_inst || !supports_sparse) {
+        if (offset.IsEmpty()) {
+            ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref);
+            return;
+        }
+        if (offset2.IsEmpty()) {
+            ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref,
+                    GetOffsetVec(ctx, offset));
+            return;
+        }
+        // PTP
+        const auto offsets{PtpOffsets(offset, offset2)};
+        ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets);
+        return;
+    }
+    if (offset.IsEmpty()) {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst,
+                  texture, coords, dref, texel);
+        return;
+    }
+    if (offset2.IsEmpty()) {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
+                  *sparse_inst, texture, CastToIntVec(coords, info), dref,
+                  GetOffsetVec(ctx, offset), texel);
+        return;
+    }
+    // PTP
+    const auto offsets{PtpOffsets(offset, offset2)};
+    ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
+              *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel);
+}
+
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                    std::string_view coords, std::string_view offset, std::string_view lod,
+                    [[maybe_unused]] std::string_view ms) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    if (info.has_bias) {
+        throw NotImplementedException("EmitImageFetch Bias texture samples");
+    }
+    if (info.has_lod_clamp) {
+        throw NotImplementedException("EmitImageFetch Lod clamp samples");
+    }
+    const auto texture{Texture(ctx, info, index)};
+    const auto sparse_inst{PrepareSparse(inst)};
+    const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+    const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+    if (sparse_inst && !supports_sparse) {
+        LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+        ctx.AddU1("{}=true;", *sparse_inst);
+    }
+    if (!sparse_inst || !supports_sparse) {
+        if (!offset.empty()) {
+            ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture,
+                    CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info));
+        } else {
+            if (info.type == TextureType::Buffer) {
+                ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
+            } else {
+                ctx.Add("{}=texelFetch({},{},int({}));", texel, texture,
+                        CoordsCastToInt(coords, info), lod);
+            }
+        }
+        return;
+    }
+    if (!offset.empty()) {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
+                  *sparse_inst, texture, CastToIntVec(coords, info), lod,
+                  CastToIntVec(offset, info), texel);
+    } else {
+        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));",
+                  *sparse_inst, texture, CastToIntVec(coords, info), lod, texel);
+    }
+}
+
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                              std::string_view lod) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto texture{Texture(ctx, info, index)};
+    switch (info.type) {
+    case TextureType::Color1D:
+        return ctx.AddU32x4(
+            "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst,
+            texture, lod, texture);
+    case TextureType::ColorArray1D:
+    case TextureType::Color2D:
+    case TextureType::ColorCube:
+        return ctx.AddU32x4(
+            "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst,
+            texture, lod, texture);
+    case TextureType::ColorArray2D:
+    case TextureType::Color3D:
+    case TextureType::ColorArrayCube:
+        return ctx.AddU32x4(
+            "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture,
+            lod, texture);
+    case TextureType::Buffer:
+        throw NotImplementedException("EmitImageQueryDimensions Texture buffers");
+    }
+    throw LogicError("Unspecified image type {}", info.type.Value());
+}
+
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                       std::string_view coords) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto texture{Texture(ctx, info, index)};
+    return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords);
+}
+
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                       std::string_view coords, const IR::Value& derivatives,
+                       const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    if (info.has_lod_clamp) {
+        throw NotImplementedException("EmitImageGradient Lod clamp samples");
+    }
+    const auto sparse_inst{PrepareSparse(inst)};
+    if (sparse_inst) {
+        throw NotImplementedException("EmitImageGradient Sparse");
+    }
+    if (!offset.IsEmpty()) {
+        throw NotImplementedException("EmitImageGradient offset");
+    }
+    const auto texture{Texture(ctx, info, index)};
+    const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+    const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+    const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
+    if (multi_component) {
+        ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
+                derivatives_vec, derivatives_vec);
+    } else {
+        ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords,
+                derivatives_vec, derivatives_vec);
+    }
+}
+
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                   std::string_view coords) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto sparse_inst{PrepareSparse(inst)};
+    if (sparse_inst) {
+        throw NotImplementedException("EmitImageRead Sparse");
+    }
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info));
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                    std::string_view coords, std::string_view color) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color);
+}
+
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
+               value);
+}
+
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
+               value);
+}
+
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
+               value);
+}
+
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
+               value);
+}
+
+void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
+                          std::string_view) {
+    NotImplemented();
+}
+
+void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
+                          std::string_view) {
+    NotImplemented();
+}
+
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                          std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                         std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                          std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                               std::string_view coords, std::string_view value) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const auto image{Image(ctx, info, index)};
+    ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info),
+               value);
+}
+
+void EmitBindlessImageSampleImplicitLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageSampleExplicitLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageGather(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageGatherDref(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageFetch(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageQueryDimensions(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageQueryLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageGradient(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageRead(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageWrite(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageSampleImplicitLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageSampleExplicitLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageGather(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageGatherDref(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageFetch(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageQueryDimensions(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageQueryLod(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageGradient(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageRead(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageWrite(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicIAdd32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicSMin32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicUMin32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicSMax32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicUMax32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicInc32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicDec32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicAnd32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicOr32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicXor32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBindlessImageAtomicExchange32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicIAdd32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicSMin32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicUMin32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicSMax32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicUMax32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicInc32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicDec32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicAnd32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicOr32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicXor32(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitBoundImageAtomicExchange32(EmitContext&) {
+    NotImplemented();
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
new file mode 100644
index 000000000..5936d086f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -0,0 +1,702 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string_view>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+class EmitContext;
+
+#define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__)
+
+// Microinstruction emitters
+void EmitPhi(EmitContext& ctx, IR::Inst& inst);
+void EmitVoid(EmitContext& ctx);
+void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitReference(EmitContext& ctx, const IR::Value& value);
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                   const IR::Value& offset);
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                   const IR::Value& offset);
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                    const IR::Value& offset);
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                    const IR::Value& offset);
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                    const IR::Value& offset);
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                    const IR::Value& offset);
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                      const IR::Value& offset);
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
+                      std::string_view vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
+                      std::string_view vertex);
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+                             std::string_view vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value,
+                             std::string_view vertex);
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value);
+void EmitSetSampleMask(EmitContext& ctx, std::string_view value);
+void EmitSetFragDepth(EmitContext& ctx, std::string_view value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
+void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadGlobalU8(EmitContext& ctx);
+void EmitLoadGlobalS8(EmitContext& ctx);
+void EmitLoadGlobalU16(EmitContext& ctx);
+void EmitLoadGlobalS16(EmitContext& ctx);
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitWriteGlobalU8(EmitContext& ctx);
+void EmitWriteGlobalS8(EmitContext& ctx);
+void EmitWriteGlobalU16(EmitContext& ctx);
+void EmitWriteGlobalS16(EmitContext& ctx);
+void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       const IR::Value& offset);
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       const IR::Value& offset);
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        const IR::Value& offset);
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        const IR::Value& offset);
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       const IR::Value& offset);
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       const IR::Value& offset);
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        const IR::Value& offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        std::string_view value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        std::string_view value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         std::string_view value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         std::string_view value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        std::string_view value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        std::string_view value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         std::string_view value);
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2);
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2, std::string_view e3);
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2, std::string_view e3, std::string_view e4);
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index);
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index);
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index);
+void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index);
+void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index);
+void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index);
+void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2);
+void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2,
+                                 std::string_view e3);
+void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2,
+                                 std::string_view e3, std::string_view e4);
+void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index);
+void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index);
+void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index);
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2);
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2, std::string_view e3);
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+                                 std::string_view e2, std::string_view e3, std::string_view e4);
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index);
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index);
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                               u32 index);
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index);
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index);
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+                              std::string_view object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index);
+void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index);
+void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+                              u32 index);
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                  std::string_view true_value, std::string_view false_value);
+void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+                  std::string_view false_value);
+void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+                   std::string_view false_value);
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                   std::string_view true_value, std::string_view false_value);
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                   std::string_view true_value, std::string_view false_value);
+void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+                   std::string_view false_value);
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                   std::string_view true_value, std::string_view false_value);
+void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                   std::string_view true_value, std::string_view false_value);
+void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst);
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst);
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+                 std::string_view c);
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+                 std::string_view c);
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+                 std::string_view c);
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                   std::string_view min_value, std::string_view max_value);
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                   std::string_view min_value, std::string_view max_value);
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                   std::string_view min_value, std::string_view max_value);
+void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                        std::string_view rhs);
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                        std::string_view rhs);
+void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                         std::string_view rhs);
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                         std::string_view rhs);
+void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs);
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs);
+void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                         std::string_view rhs);
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                         std::string_view rhs);
+void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs);
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs);
+void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                            std::string_view rhs);
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                            std::string_view rhs);
+void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                              std::string_view rhs);
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                              std::string_view rhs);
+void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                              std::string_view rhs);
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                              std::string_view rhs);
+void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                std::string_view rhs);
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                std::string_view rhs);
+void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                 std::string_view rhs);
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                 std::string_view rhs);
+void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                   std::string_view rhs);
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                                   std::string_view rhs);
+void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                            std::string_view shift);
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                            std::string_view shift);
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                             std::string_view shift);
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                             std::string_view shift);
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                                std::string_view shift);
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                                std::string_view shift);
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                        std::string_view insert, std::string_view offset, std::string_view count);
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                          std::string_view offset, std::string_view count);
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                          std::string_view offset, std::string_view count);
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+                  std::string_view max);
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+                  std::string_view max);
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                        std::string_view rhs);
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                        std::string_view rhs);
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs);
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs);
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value);
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value);
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value);
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value);
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                            std::string_view value);
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                           std::string_view value);
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                           std::string_view value);
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                           std::string_view value);
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                          std::string_view value);
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                           std::string_view value);
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                                std::string_view value);
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+                                std::string_view value);
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               const IR::Value& offset, std::string_view value);
+void EmitGlobalAtomicIAdd32(EmitContext& ctx);
+void EmitGlobalAtomicSMin32(EmitContext& ctx);
+void EmitGlobalAtomicUMin32(EmitContext& ctx);
+void EmitGlobalAtomicSMax32(EmitContext& ctx);
+void EmitGlobalAtomicUMax32(EmitContext& ctx);
+void EmitGlobalAtomicInc32(EmitContext& ctx);
+void EmitGlobalAtomicDec32(EmitContext& ctx);
+void EmitGlobalAtomicAnd32(EmitContext& ctx);
+void EmitGlobalAtomicOr32(EmitContext& ctx);
+void EmitGlobalAtomicXor32(EmitContext& ctx);
+void EmitGlobalAtomicExchange32(EmitContext& ctx);
+void EmitGlobalAtomicIAdd64(EmitContext& ctx);
+void EmitGlobalAtomicSMin64(EmitContext& ctx);
+void EmitGlobalAtomicUMin64(EmitContext& ctx);
+void EmitGlobalAtomicSMax64(EmitContext& ctx);
+void EmitGlobalAtomicUMax64(EmitContext& ctx);
+void EmitGlobalAtomicInc64(EmitContext& ctx);
+void EmitGlobalAtomicDec64(EmitContext& ctx);
+void EmitGlobalAtomicAnd64(EmitContext& ctx);
+void EmitGlobalAtomicOr64(EmitContext& ctx);
+void EmitGlobalAtomicXor64(EmitContext& ctx);
+void EmitGlobalAtomicExchange64(EmitContext& ctx);
+void EmitGlobalAtomicAddF32(EmitContext& ctx);
+void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBindlessImageSampleImplicitLod(EmitContext&);
+void EmitBindlessImageSampleExplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+void EmitBindlessImageGather(EmitContext&);
+void EmitBindlessImageGatherDref(EmitContext&);
+void EmitBindlessImageFetch(EmitContext&);
+void EmitBindlessImageQueryDimensions(EmitContext&);
+void EmitBindlessImageQueryLod(EmitContext&);
+void EmitBindlessImageGradient(EmitContext&);
+void EmitBindlessImageRead(EmitContext&);
+void EmitBindlessImageWrite(EmitContext&);
+void EmitBoundImageSampleImplicitLod(EmitContext&);
+void EmitBoundImageSampleExplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+void EmitBoundImageGather(EmitContext&);
+void EmitBoundImageGatherDref(EmitContext&);
+void EmitBoundImageFetch(EmitContext&);
+void EmitBoundImageQueryDimensions(EmitContext&);
+void EmitBoundImageQueryLod(EmitContext&);
+void EmitBoundImageGradient(EmitContext&);
+void EmitBoundImageRead(EmitContext&);
+void EmitBoundImageWrite(EmitContext&);
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                std::string_view coords, std::string_view bias_lc,
+                                const IR::Value& offset);
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                std::string_view coords, std::string_view lod_lc,
+                                const IR::Value& offset);
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                    std::string_view coords, std::string_view dref,
+                                    std::string_view bias_lc, const IR::Value& offset);
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                                    std::string_view coords, std::string_view dref,
+                                    std::string_view lod_lc, const IR::Value& offset);
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                     std::string_view coords, const IR::Value& offset, const IR::Value& offset2);
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                         std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
+                         std::string_view dref);
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                    std::string_view coords, std::string_view offset, std::string_view lod,
+                    std::string_view ms);
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                              std::string_view lod);
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                       std::string_view coords);
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                       std::string_view coords, const IR::Value& derivatives,
+                       const IR::Value& offset, const IR::Value& lod_clamp);
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                   std::string_view coords);
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                    std::string_view coords, std::string_view color);
+void EmitBindlessImageAtomicIAdd32(EmitContext&);
+void EmitBindlessImageAtomicSMin32(EmitContext&);
+void EmitBindlessImageAtomicUMin32(EmitContext&);
+void EmitBindlessImageAtomicSMax32(EmitContext&);
+void EmitBindlessImageAtomicUMax32(EmitContext&);
+void EmitBindlessImageAtomicInc32(EmitContext&);
+void EmitBindlessImageAtomicDec32(EmitContext&);
+void EmitBindlessImageAtomicAnd32(EmitContext&);
+void EmitBindlessImageAtomicOr32(EmitContext&);
+void EmitBindlessImageAtomicXor32(EmitContext&);
+void EmitBindlessImageAtomicExchange32(EmitContext&);
+void EmitBoundImageAtomicIAdd32(EmitContext&);
+void EmitBoundImageAtomicSMin32(EmitContext&);
+void EmitBoundImageAtomicUMin32(EmitContext&);
+void EmitBoundImageAtomicSMax32(EmitContext&);
+void EmitBoundImageAtomicUMax32(EmitContext&);
+void EmitBoundImageAtomicInc32(EmitContext&);
+void EmitBoundImageAtomicDec32(EmitContext&);
+void EmitBoundImageAtomicAnd32(EmitContext&);
+void EmitBoundImageAtomicOr32(EmitContext&);
+void EmitBoundImageAtomicXor32(EmitContext&);
+void EmitBoundImageAtomicExchange32(EmitContext&);
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value);
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value);
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value);
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value);
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                           std::string_view coords, std::string_view value);
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                          std::string_view coords, std::string_view value);
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                          std::string_view coords, std::string_view value);
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                          std::string_view coords, std::string_view value);
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                         std::string_view coords, std::string_view value);
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                          std::string_view coords, std::string_view value);
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+                               std::string_view coords, std::string_view value);
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                      std::string_view index, std::string_view clamp,
+                      std::string_view segmentation_mask);
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
+                   std::string_view clamp, std::string_view segmentation_mask);
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                     std::string_view index, std::string_view clamp,
+                     std::string_view segmentation_mask);
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                          std::string_view index, std::string_view clamp,
+                          std::string_view segmentation_mask);
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
+                     std::string_view swizzle);
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
new file mode 100644
index 000000000..38419f88f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
@@ -0,0 +1,253 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
+    IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
+    if (!zero) {
+        return;
+    }
+    ctx.AddU1("{}={}==0;", *zero, result);
+    zero->Invalidate();
+}
+
+void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
+    IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
+    if (!sign) {
+        return;
+    }
+    ctx.AddU1("{}=int({})<0;", *sign, result);
+    sign->Invalidate();
+}
+
+void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+                      char lop) {
+    const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    ctx.Add("{}={}{}{};", result, a, lop, b);
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+}
+} // Anonymous namespace
+
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    // Compute the overflow CC first as it requires the original operand values,
+    // which may be overwritten by the result of the addition
+    if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
+        // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
+        constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
+        const auto sub_a{fmt::format("{}u-{}", s32_max, a)};
+        const auto positive_result{fmt::format("int({})>int({})", b, sub_a)};
+        const auto negative_result{fmt::format("int({})<int({})", b, sub_a)};
+        ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result);
+        overflow->Invalidate();
+    }
+    const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
+        ctx.uses_cc_carry = true;
+        ctx.Add("{}=uaddCarry({},{},carry);", result, a, b);
+        ctx.AddU1("{}=carry!=0;", *carry);
+        carry->Invalidate();
+    } else {
+        ctx.Add("{}={}+{};", result, a, b);
+    }
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+}
+
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU64("{}={}+{};", inst, a, b);
+}
+
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU32("{}={}-{};", inst, a, b);
+}
+
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU64("{}={}-{};", inst, a, b);
+}
+
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU32("{}=uint({}*{});", inst, a, b);
+}
+
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=uint(-({}));", inst, value);
+}
+
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU64("{}=-({});", inst, value);
+}
+
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=abs(int({}));", inst, value);
+}
+
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                            std::string_view shift) {
+    ctx.AddU32("{}={}<<{};", inst, base, shift);
+}
+
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                            std::string_view shift) {
+    ctx.AddU64("{}={}<<{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                             std::string_view shift) {
+    ctx.AddU32("{}={}>>{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                             std::string_view shift) {
+    ctx.AddU64("{}={}>>{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                                std::string_view shift) {
+    ctx.AddU32("{}=int({})>>{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                                std::string_view shift) {
+    ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift);
+}
+
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    BitwiseLogicalOp(ctx, inst, a, b, '&');
+}
+
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    BitwiseLogicalOp(ctx, inst, a, b, '|');
+}
+
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    BitwiseLogicalOp(ctx, inst, a, b, '^');
+}
+
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                        std::string_view insert, std::string_view offset, std::string_view count) {
+    ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count);
+}
+
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                          std::string_view offset, std::string_view count) {
+    const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count);
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+}
+
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+                          std::string_view offset, std::string_view count) {
+    const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count);
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+}
+
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=bitfieldReverse({});", inst, value);
+}
+
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=bitCount({});", inst, value);
+}
+
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=~{};", inst, value);
+}
+
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=findMSB(int({}));", inst, value);
+}
+
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU32("{}=findMSB(uint({}));", inst, value);
+}
+
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU32("{}=min(int({}),int({}));", inst, a, b);
+}
+
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b);
+}
+
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU32("{}=max(int({}),int({}));", inst, a, b);
+}
+
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b);
+}
+
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+                  std::string_view max) {
+    const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max);
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+}
+
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+                  std::string_view max) {
+    const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+    ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max);
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+}
+
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+    ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs);
+}
+
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+    ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs);
+}
+
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+    ctx.AddU1("{}={}=={};", inst, lhs, rhs);
+}
+
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                        std::string_view rhs) {
+    ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs);
+}
+
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                        std::string_view rhs) {
+    ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs);
+}
+
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                      std::string_view rhs) {
+    ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs);
+}
+
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                      std::string_view rhs) {
+    ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs);
+}
+
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+    ctx.AddU1("{}={}!={};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs) {
+    ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs);
+}
+
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+                           std::string_view rhs) {
+    ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
new file mode 100644
index 000000000..338ff4bd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU1("{}={}||{};", inst, a, b);
+}
+
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU1("{}={}&&{};", inst, a, b);
+}
+
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU1("{}={}^^{};", inst, a, b);
+}
+
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+    ctx.AddU1("{}=!{};", inst, value);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
new file mode 100644
index 000000000..e3957491f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
@@ -0,0 +1,202 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
+                          "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
+                          "if(cas_result==old_value){{break;}}}}"};
+
+void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var,
+                  std::string_view value, std::string_view bit_offset, u32 num_bits) {
+    const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)};
+    ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits);
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitLoadGlobalS8(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitLoadGlobalU16(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitLoadGlobalS16(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+    if (ctx.profile.support_int64) {
+        return ctx.AddU32("{}=LoadGlobal32({});", inst, address);
+    }
+    LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+    ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+    if (ctx.profile.support_int64) {
+        return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address);
+    }
+    LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+    ctx.AddU32x2("{}=uvec2(0);", inst);
+}
+
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+    if (ctx.profile.support_int64) {
+        return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address);
+    }
+    LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+    ctx.AddU32x4("{}=uvec4(0);", inst);
+}
+
+void EmitWriteGlobalU8(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitWriteGlobalS8(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitWriteGlobalU16(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitWriteGlobalS16(EmitContext&) {
+    NotImplemented();
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) {
+    if (ctx.profile.support_int64) {
+        return ctx.Add("WriteGlobal32({},{});", address, value);
+    }
+    LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) {
+    if (ctx.profile.support_int64) {
+        return ctx.Add("WriteGlobal64({},{});", address, value);
+    }
+    LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) {
+    if (ctx.profile.support_int64) {
+        return ctx.Add("WriteGlobal128({},{});", address, value);
+    }
+    LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       const IR::Value& offset) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name,
+               binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       const IR::Value& offset) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name,
+               binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        const IR::Value& offset) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name,
+               binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        const IR::Value& offset) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst,
+               ctx.stage_name, binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       const IR::Value& offset) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var);
+}
+
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                       const IR::Value& offset) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
+                 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
+}
+
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                        const IR::Value& offset) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
+                 "+12)>>2]);",
+                 inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
+                 offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,
+                 binding.U32(), offset_var);
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        std::string_view value) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
+    SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        std::string_view value) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
+    SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         std::string_view value) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
+    SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         std::string_view value) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
+    SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        std::string_view value) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value);
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        std::string_view value) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
+    ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         std::string_view value) {
+    const auto offset_var{ctx.var_alloc.Consume(offset)};
+    ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
+    ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
+    ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value);
+    ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
new file mode 100644
index 000000000..f420fe388
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+namespace Shader::Backend::GLSL {
+
+void EmitGetRegister(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetRegister(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetPred(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetPred(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetGotoVariable(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetGotoVariable(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetIndirectBranchVariable(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetIndirectBranchVariable(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetZFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetSFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetCFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetOFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetZFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetSFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetCFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitSetOFlag(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetZeroFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetSignFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetCarryFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetOverflowFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetSparseFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+void EmitGetInBoundsFromOp(EmitContext& ctx) {
+    NotImplemented();
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
new file mode 100644
index 000000000..49fba9073
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                  std::string_view true_value, std::string_view false_value) {
+    ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+                  [[maybe_unused]] std::string_view true_value,
+                  [[maybe_unused]] std::string_view false_value) {
+    NotImplemented();
+}
+
+void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+                   [[maybe_unused]] std::string_view true_value,
+                   [[maybe_unused]] std::string_view false_value) {
+    NotImplemented();
+}
+
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                   std::string_view true_value, std::string_view false_value) {
+    ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                   std::string_view true_value, std::string_view false_value) {
+    ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+                   [[maybe_unused]] std::string_view true_value,
+                   [[maybe_unused]] std::string_view false_value) {
+    NotImplemented();
+}
+
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                   std::string_view true_value, std::string_view false_value) {
+    ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+                   std::string_view true_value, std::string_view false_value) {
+    ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
new file mode 100644
index 000000000..518b78f06
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
@@ -0,0 +1,79 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
+                          "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
+                          "if(cas_result==old_value){{break;}}}}"};
+
+void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value,
+                    std::string_view bit_offset, u32 num_bits) {
+    const auto smem{fmt::format("smem[{}>>2]", offset)};
+    ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits);
+}
+} // Anonymous namespace
+
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+    ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset);
+}
+
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+    ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset);
+}
+
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+    ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset);
+}
+
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+    ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset);
+}
+
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+    ctx.AddU32("{}=smem[{}>>2];", inst, offset);
+}
+
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+    ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
+}
+
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+    ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
+                 offset, offset, offset, offset);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) {
+    const auto bit_offset{fmt::format("int({}%4)*8", offset)};
+    SharedWriteCas(ctx, offset, value, bit_offset, 8);
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) {
+    const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)};
+    SharedWriteCas(ctx, offset, value, bit_offset, 16);
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) {
+    ctx.Add("smem[{}>>2]={};", offset, value);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) {
+    ctx.Add("smem[{}>>2]={}.x;", offset, value);
+    ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) {
+    ctx.Add("smem[{}>>2]={}.x;", offset, value);
+    ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
+    ctx.Add("smem[({}+8)>>2]={}.z;", offset, value);
+    ctx.Add("smem[({}+12)>>2]={}.w;", offset, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
new file mode 100644
index 000000000..9b866f889
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -0,0 +1,111 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string_view OutputVertexIndex(EmitContext& ctx) {
+    return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
+}
+
+void InitializeOutputVaryings(EmitContext& ctx) {
+    if (ctx.uses_geometry_passthrough) {
+        return;
+    }
+    if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
+        ctx.Add("gl_Position=vec4(0,0,0,1);");
+    }
+    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+        if (!ctx.info.stores.Generic(index)) {
+            continue;
+        }
+        const auto& info_array{ctx.output_generics.at(index)};
+        const auto output_decorator{OutputVertexIndex(ctx)};
+        size_t element{};
+        while (element < info_array.size()) {
+            const auto& info{info_array.at(element)};
+            const auto varying_name{fmt::format("{}{}", info.name, output_decorator)};
+            switch (info.num_components) {
+            case 1: {
+                const char value{element == 3 ? '1' : '0'};
+                ctx.Add("{}={}.f;", varying_name, value);
+                break;
+            }
+            case 2:
+            case 3:
+                if (element + info.num_components < 4) {
+                    ctx.Add("{}=vec{}(0);", varying_name, info.num_components);
+                } else {
+                    // last element is the w component, must be initialized to 1
+                    const auto zeros{info.num_components == 3 ? "0,0," : "0,"};
+                    ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros);
+                }
+                break;
+            case 4:
+                ctx.Add("{}=vec4(0,0,0,1);", varying_name);
+                break;
+            default:
+                break;
+            }
+            element += info.num_components;
+        }
+    }
+}
+} // Anonymous namespace
+
+void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
+    const size_t num_args{phi.NumArgs()};
+    for (size_t i = 0; i < num_args; ++i) {
+        ctx.var_alloc.Consume(phi.Arg(i));
+    }
+    if (!phi.Definition<Id>().is_valid) {
+        // The phi node wasn't forward defined
+        ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type());
+    }
+}
+
+void EmitVoid(EmitContext&) {}
+
+void EmitReference(EmitContext& ctx, const IR::Value& value) {
+    ctx.var_alloc.Consume(value);
+}
+
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
+    IR::Inst& phi{*phi_value.InstRecursive()};
+    const auto phi_type{phi.Arg(0).Type()};
+    if (!phi.Definition<Id>().is_valid) {
+        // The phi node wasn't forward defined
+        ctx.var_alloc.PhiDefine(phi, phi_type);
+    }
+    const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})};
+    const auto val_reg{ctx.var_alloc.Consume(value)};
+    if (phi_reg == val_reg) {
+        return;
+    }
+    ctx.Add("{}={};", phi_reg, val_reg);
+}
+
+void EmitPrologue(EmitContext& ctx) {
+    InitializeOutputVaryings(ctx);
+}
+
+void EmitEpilogue(EmitContext&) {}
+
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
+    ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
+    InitializeOutputVaryings(ctx);
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+    ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream));
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
new file mode 100644
index 000000000..15bf02dd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU1("{}=false;", inst);
+}
+
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU64("{}=0u;", inst);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
new file mode 100644
index 000000000..a982dd8a2
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -0,0 +1,217 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
+    IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+    if (!in_bounds) {
+        return;
+    }
+    ctx.AddU1("{}=shfl_in_bounds;", *in_bounds);
+    in_bounds->Invalidate();
+}
+
+std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) {
+    return fmt::format("({}&{})", thread_id, segmentation_mask);
+}
+
+std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp,
+                               std::string_view not_seg_mask) {
+    return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask);
+}
+
+std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp,
+                           std::string_view segmentation_mask) {
+    const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
+    const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
+    return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask);
+}
+
+void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
+                  std::string_view value, std::string_view index,
+                  [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) {
+    const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)};
+    ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
+    SetInBoundsFlag(ctx, inst);
+}
+} // Anonymous namespace
+
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
+}
+
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+    } else {
+        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+        ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
+    }
+}
+
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
+    } else {
+        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+        ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
+    }
+}
+
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+    } else {
+        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+        const auto value{fmt::format("({}^{})", ballot, active_mask)};
+        ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
+    }
+}
+
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
+    } else {
+        ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
+    }
+}
+
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
+}
+
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
+}
+
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
+}
+
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
+}
+
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
+}
+
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                      std::string_view index, std::string_view clamp,
+                      std::string_view segmentation_mask) {
+    if (ctx.profile.support_gl_warp_intrinsics) {
+        UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
+        return;
+    }
+    const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
+    const auto thread_id{"gl_SubGroupInvocationARB"};
+    const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
+    const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
+
+    const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
+    const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
+    ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+    SetInBoundsFlag(ctx, inst);
+    ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
+                   std::string_view clamp, std::string_view segmentation_mask) {
+    if (ctx.profile.support_gl_warp_intrinsics) {
+        UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
+        return;
+    }
+    const auto thread_id{"gl_SubGroupInvocationARB"};
+    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+    const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
+    ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
+    SetInBoundsFlag(ctx, inst);
+    ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                     std::string_view index, std::string_view clamp,
+                     std::string_view segmentation_mask) {
+    if (ctx.profile.support_gl_warp_intrinsics) {
+        UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
+        return;
+    }
+    const auto thread_id{"gl_SubGroupInvocationARB"};
+    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+    const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
+    ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+    SetInBoundsFlag(ctx, inst);
+    ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+                          std::string_view index, std::string_view clamp,
+                          std::string_view segmentation_mask) {
+    if (ctx.profile.support_gl_warp_intrinsics) {
+        UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
+        return;
+    }
+    const auto thread_id{"gl_SubGroupInvocationARB"};
+    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+    const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
+    ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+    SetInBoundsFlag(ctx, inst);
+    ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
+                     std::string_view swizzle) {
+    const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)};
+    const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask);
+    const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask);
+    ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b);
+}
+
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+    if (ctx.profile.support_gl_derivative_control) {
+        ctx.AddF32("{}=dFdxFine({});", inst, op_a);
+    } else {
+        LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx");
+        ctx.AddF32("{}=dFdx({});", inst, op_a);
+    }
+}
+
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+    if (ctx.profile.support_gl_derivative_control) {
+        ctx.AddF32("{}=dFdyFine({});", inst, op_a);
+    } else {
+        LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy");
+        ctx.AddF32("{}=dFdy({});", inst, op_a);
+    }
+}
+
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+    if (ctx.profile.support_gl_derivative_control) {
+        ctx.AddF32("{}=dFdxCoarse({});", inst, op_a);
+    } else {
+        LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx");
+        ctx.AddF32("{}=dFdx({});", inst, op_a);
+    }
+}
+
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+    if (ctx.profile.support_gl_derivative_control) {
+        ctx.AddF32("{}=dFdyCoarse({});", inst, op_a);
+    } else {
+        LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy");
+        ctx.AddF32("{}=dFdy({});", inst, op_a);
+    }
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp
new file mode 100644
index 000000000..194f926ca
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp
@@ -0,0 +1,308 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glsl/var_alloc.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string TypePrefix(GlslVarType type) {
+    switch (type) {
+    case GlslVarType::U1:
+        return "b_";
+    case GlslVarType::F16x2:
+        return "f16x2_";
+    case GlslVarType::U32:
+        return "u_";
+    case GlslVarType::F32:
+        return "f_";
+    case GlslVarType::U64:
+        return "u64_";
+    case GlslVarType::F64:
+        return "d_";
+    case GlslVarType::U32x2:
+        return "u2_";
+    case GlslVarType::F32x2:
+        return "f2_";
+    case GlslVarType::U32x3:
+        return "u3_";
+    case GlslVarType::F32x3:
+        return "f3_";
+    case GlslVarType::U32x4:
+        return "u4_";
+    case GlslVarType::F32x4:
+        return "f4_";
+    case GlslVarType::PrecF32:
+        return "pf_";
+    case GlslVarType::PrecF64:
+        return "pd_";
+    case GlslVarType::Void:
+        return "";
+    default:
+        throw NotImplementedException("Type {}", type);
+    }
+}
+
+std::string FormatFloat(std::string_view value, IR::Type type) {
+    // TODO: Confirm FP64 nan/inf
+    if (type == IR::Type::F32) {
+        if (value == "nan") {
+            return "utof(0x7fc00000)";
+        }
+        if (value == "inf") {
+            return "utof(0x7f800000)";
+        }
+        if (value == "-inf") {
+            return "utof(0xff800000)";
+        }
+    }
+    if (value.find_first_of('e') != std::string_view::npos) {
+        // scientific notation
+        const auto cast{type == IR::Type::F32 ? "float" : "double"};
+        return fmt::format("{}({})", cast, value);
+    }
+    const bool needs_dot{value.find_first_of('.') == std::string_view::npos};
+    const bool needs_suffix{!value.ends_with('f')};
+    const auto suffix{type == IR::Type::F32 ? "f" : "lf"};
+    return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : "");
+}
+
+std::string MakeImm(const IR::Value& value) {
+    switch (value.Type()) {
+    case IR::Type::U1:
+        return fmt::format("{}", value.U1() ? "true" : "false");
+    case IR::Type::U32:
+        return fmt::format("{}u", value.U32());
+    case IR::Type::F32:
+        return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32);
+    case IR::Type::U64:
+        return fmt::format("{}ul", value.U64());
+    case IR::Type::F64:
+        return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64);
+    case IR::Type::Void:
+        return "";
+    default:
+        throw NotImplementedException("Immediate type {}", value.Type());
+    }
+}
+} // Anonymous namespace
+
+std::string VarAlloc::Representation(u32 index, GlslVarType type) const {
+    const auto prefix{TypePrefix(type)};
+    return fmt::format("{}{}", prefix, index);
+}
+
+std::string VarAlloc::Representation(Id id) const {
+    return Representation(id.index, id.type);
+}
+
+std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) {
+    if (inst.HasUses()) {
+        inst.SetDefinition<Id>(Alloc(type));
+        return Representation(inst.Definition<Id>());
+    } else {
+        Id id{};
+        id.type.Assign(type);
+        GetUseTracker(type).uses_temp = true;
+        inst.SetDefinition<Id>(id);
+        return 't' + Representation(inst.Definition<Id>());
+    }
+}
+
+std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) {
+    return Define(inst, RegType(type));
+}
+
+std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) {
+    return AddDefine(inst, RegType(type));
+}
+
+std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) {
+    if (inst.HasUses()) {
+        inst.SetDefinition<Id>(Alloc(type));
+        return Representation(inst.Definition<Id>());
+    } else {
+        return "";
+    }
+    return Representation(inst.Definition<Id>());
+}
+
+std::string VarAlloc::Consume(const IR::Value& value) {
+    return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive());
+}
+
+std::string VarAlloc::ConsumeInst(IR::Inst& inst) {
+    inst.DestructiveRemoveUsage();
+    if (!inst.HasUses()) {
+        Free(inst.Definition<Id>());
+    }
+    return Representation(inst.Definition<Id>());
+}
+
+std::string VarAlloc::GetGlslType(IR::Type type) const {
+    return GetGlslType(RegType(type));
+}
+
+Id VarAlloc::Alloc(GlslVarType type) {
+    auto& use_tracker{GetUseTracker(type)};
+    const auto num_vars{use_tracker.var_use.size()};
+    for (size_t var = 0; var < num_vars; ++var) {
+        if (use_tracker.var_use[var]) {
+            continue;
+        }
+        use_tracker.num_used = std::max(use_tracker.num_used, var + 1);
+        use_tracker.var_use[var] = true;
+        Id ret{};
+        ret.is_valid.Assign(1);
+        ret.type.Assign(type);
+        ret.index.Assign(static_cast<u32>(var));
+        return ret;
+    }
+    // Allocate a new variable
+    use_tracker.var_use.push_back(true);
+    Id ret{};
+    ret.is_valid.Assign(1);
+    ret.type.Assign(type);
+    ret.index.Assign(static_cast<u32>(use_tracker.num_used));
+    ++use_tracker.num_used;
+    return ret;
+}
+
+void VarAlloc::Free(Id id) {
+    if (id.is_valid == 0) {
+        throw LogicError("Freeing invalid variable");
+    }
+    auto& use_tracker{GetUseTracker(id.type)};
+    use_tracker.var_use[id.index] = false;
+}
+
+GlslVarType VarAlloc::RegType(IR::Type type) const {
+    switch (type) {
+    case IR::Type::U1:
+        return GlslVarType::U1;
+    case IR::Type::U32:
+        return GlslVarType::U32;
+    case IR::Type::F32:
+        return GlslVarType::F32;
+    case IR::Type::U64:
+        return GlslVarType::U64;
+    case IR::Type::F64:
+        return GlslVarType::F64;
+    default:
+        throw NotImplementedException("IR type {}", type);
+    }
+}
+
+std::string VarAlloc::GetGlslType(GlslVarType type) const {
+    switch (type) {
+    case GlslVarType::U1:
+        return "bool";
+    case GlslVarType::F16x2:
+        return "f16vec2";
+    case GlslVarType::U32:
+        return "uint";
+    case GlslVarType::F32:
+    case GlslVarType::PrecF32:
+        return "float";
+    case GlslVarType::U64:
+        return "uint64_t";
+    case GlslVarType::F64:
+    case GlslVarType::PrecF64:
+        return "double";
+    case GlslVarType::U32x2:
+        return "uvec2";
+    case GlslVarType::F32x2:
+        return "vec2";
+    case GlslVarType::U32x3:
+        return "uvec3";
+    case GlslVarType::F32x3:
+        return "vec3";
+    case GlslVarType::U32x4:
+        return "uvec4";
+    case GlslVarType::F32x4:
+        return "vec4";
+    case GlslVarType::Void:
+        return "";
+    default:
+        throw NotImplementedException("Type {}", type);
+    }
+}
+
+VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) {
+    switch (type) {
+    case GlslVarType::U1:
+        return var_bool;
+    case GlslVarType::F16x2:
+        return var_f16x2;
+    case GlslVarType::U32:
+        return var_u32;
+    case GlslVarType::F32:
+        return var_f32;
+    case GlslVarType::U64:
+        return var_u64;
+    case GlslVarType::F64:
+        return var_f64;
+    case GlslVarType::U32x2:
+        return var_u32x2;
+    case GlslVarType::F32x2:
+        return var_f32x2;
+    case GlslVarType::U32x3:
+        return var_u32x3;
+    case GlslVarType::F32x3:
+        return var_f32x3;
+    case GlslVarType::U32x4:
+        return var_u32x4;
+    case GlslVarType::F32x4:
+        return var_f32x4;
+    case GlslVarType::PrecF32:
+        return var_precf32;
+    case GlslVarType::PrecF64:
+        return var_precf64;
+    default:
+        throw NotImplementedException("Type {}", type);
+    }
+}
+
+const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const {
+    switch (type) {
+    case GlslVarType::U1:
+        return var_bool;
+    case GlslVarType::F16x2:
+        return var_f16x2;
+    case GlslVarType::U32:
+        return var_u32;
+    case GlslVarType::F32:
+        return var_f32;
+    case GlslVarType::U64:
+        return var_u64;
+    case GlslVarType::F64:
+        return var_f64;
+    case GlslVarType::U32x2:
+        return var_u32x2;
+    case GlslVarType::F32x2:
+        return var_f32x2;
+    case GlslVarType::U32x3:
+        return var_u32x3;
+    case GlslVarType::F32x3:
+        return var_f32x3;
+    case GlslVarType::U32x4:
+        return var_u32x4;
+    case GlslVarType::F32x4:
+        return var_f32x4;
+    case GlslVarType::PrecF32:
+        return var_precf32;
+    case GlslVarType::PrecF64:
+        return var_precf64;
+    default:
+        throw NotImplementedException("Type {}", type);
+    }
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h
new file mode 100644
index 000000000..8b49f32a6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.h
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <string>
+#include <vector>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Shader::IR {
+class Inst;
+class Value;
+enum class Type;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+enum class GlslVarType : u32 {
+    U1,
+    F16x2,
+    U32,
+    F32,
+    U64,
+    F64,
+    U32x2,
+    F32x2,
+    U32x3,
+    F32x3,
+    U32x4,
+    F32x4,
+    PrecF32,
+    PrecF64,
+    Void,
+};
+
+struct Id {
+    union {
+        u32 raw;
+        BitField<0, 1, u32> is_valid;
+        BitField<1, 4, GlslVarType> type;
+        BitField<6, 26, u32> index;
+    };
+
+    bool operator==(Id rhs) const noexcept {
+        return raw == rhs.raw;
+    }
+    bool operator!=(Id rhs) const noexcept {
+        return !operator==(rhs);
+    }
+};
+static_assert(sizeof(Id) == sizeof(u32));
+
+class VarAlloc {
+public:
+    struct UseTracker {
+        bool uses_temp{};
+        size_t num_used{};
+        std::vector<bool> var_use;
+    };
+
+    /// Used for explicit usages of variables, may revert to temporaries
+    std::string Define(IR::Inst& inst, GlslVarType type);
+    std::string Define(IR::Inst& inst, IR::Type type);
+
+    /// Used to assign variables used by the IR. May return a blank string if
+    /// the instruction's result is unused in the IR.
+    std::string AddDefine(IR::Inst& inst, GlslVarType type);
+    std::string PhiDefine(IR::Inst& inst, IR::Type type);
+
+    std::string Consume(const IR::Value& value);
+    std::string ConsumeInst(IR::Inst& inst);
+
+    std::string GetGlslType(GlslVarType type) const;
+    std::string GetGlslType(IR::Type type) const;
+
+    const UseTracker& GetUseTracker(GlslVarType type) const;
+    std::string Representation(u32 index, GlslVarType type) const;
+
+private:
+    GlslVarType RegType(IR::Type type) const;
+    Id Alloc(GlslVarType type);
+    void Free(Id id);
+    UseTracker& GetUseTracker(GlslVarType type);
+    std::string Representation(Id id) const;
+
+    UseTracker var_bool{};
+    UseTracker var_f16x2{};
+    UseTracker var_u32{};
+    UseTracker var_u32x2{};
+    UseTracker var_u32x3{};
+    UseTracker var_u32x4{};
+    UseTracker var_f32{};
+    UseTracker var_f32x2{};
+    UseTracker var_f32x3{};
+    UseTracker var_f32x4{};
+    UseTracker var_u64{};
+    UseTracker var_f64{};
+    UseTracker var_precf32{};
+    UseTracker var_precf64{};
+};
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
new file mode 100644
index 000000000..2d29d8c14
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -0,0 +1,1368 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "shader_recompiler/backend/spirv/emit_context.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+enum class Operation {
+    Increment,
+    Decrement,
+    FPAdd,
+    FPMin,
+    FPMax,
+};
+
+struct AttrInfo {
+    Id pointer;
+    Id id;
+    bool needs_cast;
+};
+
+Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
+    const spv::ImageFormat format{spv::ImageFormat::Unknown};
+    const Id type{ctx.F32[1]};
+    const bool depth{desc.is_depth};
+    switch (desc.type) {
+    case TextureType::Color1D:
+        return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
+    case TextureType::ColorArray1D:
+        return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
+    case TextureType::Color2D:
+        return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format);
+    case TextureType::ColorArray2D:
+        return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format);
+    case TextureType::Color3D:
+        return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
+    case TextureType::ColorCube:
+        return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
+    case TextureType::ColorArrayCube:
+        return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
+    case TextureType::Buffer:
+        break;
+    }
+    throw InvalidArgument("Invalid texture type {}", desc.type);
+}
+
+spv::ImageFormat GetImageFormat(ImageFormat format) {
+    switch (format) {
+    case ImageFormat::Typeless:
+        return spv::ImageFormat::Unknown;
+    case ImageFormat::R8_UINT:
+        return spv::ImageFormat::R8ui;
+    case ImageFormat::R8_SINT:
+        return spv::ImageFormat::R8i;
+    case ImageFormat::R16_UINT:
+        return spv::ImageFormat::R16ui;
+    case ImageFormat::R16_SINT:
+        return spv::ImageFormat::R16i;
+    case ImageFormat::R32_UINT:
+        return spv::ImageFormat::R32ui;
+    case ImageFormat::R32G32_UINT:
+        return spv::ImageFormat::Rg32ui;
+    case ImageFormat::R32G32B32A32_UINT:
+        return spv::ImageFormat::Rgba32ui;
+    }
+    throw InvalidArgument("Invalid image format {}", format);
+}
+
+Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) {
+    const spv::ImageFormat format{GetImageFormat(desc.format)};
+    const Id type{ctx.U32[1]};
+    switch (desc.type) {
+    case TextureType::Color1D:
+        return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format);
+    case TextureType::ColorArray1D:
+        return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format);
+    case TextureType::Color2D:
+        return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format);
+    case TextureType::ColorArray2D:
+        return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format);
+    case TextureType::Color3D:
+        return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format);
+    case TextureType::Buffer:
+        throw NotImplementedException("Image buffer");
+    default:
+        break;
+    }
+    throw InvalidArgument("Invalid texture type {}", desc.type);
+}
+
+Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin,
+                  spv::StorageClass storage_class) {
+    const Id pointer_type{ctx.TypePointer(storage_class, type)};
+    const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)};
+    if (builtin) {
+        ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin);
+    }
+    ctx.interfaces.push_back(id);
+    return id;
+}
+
+u32 NumVertices(InputTopology input_topology) {
+    switch (input_topology) {
+    case InputTopology::Points:
+        return 1;
+    case InputTopology::Lines:
+        return 2;
+    case InputTopology::LinesAdjacency:
+        return 4;
+    case InputTopology::Triangles:
+        return 3;
+    case InputTopology::TrianglesAdjacency:
+        return 6;
+    }
+    throw InvalidArgument("Invalid input topology {}", input_topology);
+}
+
+Id DefineInput(EmitContext& ctx, Id type, bool per_invocation,
+               std::optional<spv::BuiltIn> builtin = std::nullopt) {
+    switch (ctx.stage) {
+    case Stage::TessellationControl:
+    case Stage::TessellationEval:
+        if (per_invocation) {
+            type = ctx.TypeArray(type, ctx.Const(32u));
+        }
+        break;
+    case Stage::Geometry:
+        if (per_invocation) {
+            const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)};
+            type = ctx.TypeArray(type, ctx.Const(num_vertices));
+        }
+        break;
+    default:
+        break;
+    }
+    return DefineVariable(ctx, type, builtin, spv::StorageClass::Input);
+}
+
+Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations,
+                std::optional<spv::BuiltIn> builtin = std::nullopt) {
+    if (invocations && ctx.stage == Stage::TessellationControl) {
+        type = ctx.TypeArray(type, ctx.Const(*invocations));
+    }
+    return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
+}
+
+void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) {
+    static constexpr std::string_view swizzle{"xyzw"};
+    const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+    u32 element{0};
+    while (element < 4) {
+        const u32 remainder{4 - element};
+        const TransformFeedbackVarying* xfb_varying{};
+        if (!ctx.runtime_info.xfb_varyings.empty()) {
+            xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element];
+            xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
+        }
+        const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
+
+        const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)};
+        ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+        if (element > 0) {
+            ctx.Decorate(id, spv::Decoration::Component, element);
+        }
+        if (xfb_varying) {
+            ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer);
+            ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride);
+            ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset);
+        }
+        if (num_components < 4 || element > 0) {
+            const std::string_view subswizzle{swizzle.substr(element, num_components)};
+            ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle));
+        } else {
+            ctx.Name(id, fmt::format("out_attr{}", index));
+        }
+        const GenericElementInfo info{
+            .id = id,
+            .first_element = element,
+            .num_components = num_components,
+        };
+        std::fill_n(ctx.output_generics[index].begin() + element, num_components, info);
+        element += num_components;
+    }
+}
+
+Id GetAttributeType(EmitContext& ctx, AttributeType type) {
+    switch (type) {
+    case AttributeType::Float:
+        return ctx.F32[4];
+    case AttributeType::SignedInt:
+        return ctx.TypeVector(ctx.TypeInt(32, true), 4);
+    case AttributeType::UnsignedInt:
+        return ctx.U32[4];
+    case AttributeType::Disabled:
+        break;
+    }
+    throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
+    const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
+    switch (type) {
+    case AttributeType::Float:
+        return AttrInfo{ctx.input_f32, ctx.F32[1], false};
+    case AttributeType::UnsignedInt:
+        return AttrInfo{ctx.input_u32, ctx.U32[1], true};
+    case AttributeType::SignedInt:
+        return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
+    case AttributeType::Disabled:
+        return std::nullopt;
+    }
+    throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+std::string_view StageName(Stage stage) {
+    switch (stage) {
+    case Stage::VertexA:
+        return "vs_a";
+    case Stage::VertexB:
+        return "vs";
+    case Stage::TessellationControl:
+        return "tcs";
+    case Stage::TessellationEval:
+        return "tes";
+    case Stage::Geometry:
+        return "gs";
+    case Stage::Fragment:
+        return "fs";
+    case Stage::Compute:
+        return "cs";
+    }
+    throw InvalidArgument("Invalid stage {}", stage);
+}
+
+template <typename... Args>
+void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) {
+    ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage),
+                                 std::forward<Args>(args)...)
+                         .c_str());
+}
+
+void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type,
+                        u32 binding, Id type, char type_char, u32 element_size) {
+    const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))};
+    ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+    const Id struct_type{ctx.TypeStruct(array_type)};
+    Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT);
+    ctx.Decorate(struct_type, spv::Decoration::Block);
+    ctx.MemberName(struct_type, 0, "data");
+    ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
+
+    const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)};
+    const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)};
+    ctx.uniform_types.*member_type = uniform_type;
+
+    for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
+        const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)};
+        ctx.Decorate(id, spv::Decoration::Binding, binding);
+        ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
+        ctx.Name(id, fmt::format("c{}", desc.index));
+        for (size_t i = 0; i < desc.count; ++i) {
+            ctx.cbufs[desc.index + i].*member_type = id;
+        }
+        if (ctx.profile.supported_spirv >= 0x00010400) {
+            ctx.interfaces.push_back(id);
+        }
+        binding += desc.count;
+    }
+}
+
+void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
+                 Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type,
+                 u32 stride) {
+    const Id array_type{ctx.TypeRuntimeArray(type)};
+    ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride);
+
+    const Id struct_type{ctx.TypeStruct(array_type)};
+    ctx.Decorate(struct_type, spv::Decoration::Block);
+    ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
+
+    const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
+    type_def.array = struct_pointer;
+    type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type);
+
+    u32 index{};
+    for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
+        const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)};
+        ctx.Decorate(id, spv::Decoration::Binding, binding);
+        ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
+        ctx.Name(id, fmt::format("ssbo{}", index));
+        if (ctx.profile.supported_spirv >= 0x00010400) {
+            ctx.interfaces.push_back(id);
+        }
+        for (size_t i = 0; i < desc.count; ++i) {
+            ctx.ssbos[index + i].*member_type = id;
+        }
+        index += desc.count;
+        binding += desc.count;
+    }
+}
+
+Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) {
+    const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)};
+    const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
+    const Id op_a{ctx.OpFunctionParameter(value_type)};
+    const Id op_b{ctx.OpFunctionParameter(value_type)};
+    ctx.AddLabel();
+    Id result{};
+    switch (operation) {
+    case Operation::Increment: {
+        const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)};
+        const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))};
+        result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr);
+        break;
+    }
+    case Operation::Decrement: {
+        const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))};
+        const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)};
+        const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)};
+        const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))};
+        result = ctx.OpSelect(value_type, pred, op_b, decr);
+        break;
+    }
+    case Operation::FPAdd:
+        result = ctx.OpFAdd(value_type, op_a, op_b);
+        break;
+    case Operation::FPMin:
+        result = ctx.OpFMin(value_type, op_a, op_b);
+        break;
+    case Operation::FPMax:
+        result = ctx.OpFMax(value_type, op_a, op_b);
+        break;
+    default:
+        break;
+    }
+    ctx.OpReturnValue(result);
+    ctx.OpFunctionEnd();
+    return func;
+}
+
+Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer,
+           Id value_type, Id memory_type, spv::Scope scope) {
+    const bool is_shared{scope == spv::Scope::Workgroup};
+    const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout};
+    const Id cas_func{CasFunction(ctx, operation, value_type)};
+    const Id zero{ctx.u32_zero_value};
+    const Id scope_id{ctx.Const(static_cast<u32>(scope))};
+
+    const Id loop_header{ctx.OpLabel()};
+    const Id continue_block{ctx.OpLabel()};
+    const Id merge_block{ctx.OpLabel()};
+    const Id func_type{is_shared
+                           ? ctx.TypeFunction(value_type, ctx.U32[1], value_type)
+                           : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)};
+
+    const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
+    const Id index{ctx.OpFunctionParameter(ctx.U32[1])};
+    const Id op_b{ctx.OpFunctionParameter(value_type)};
+    const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)};
+    ctx.AddLabel();
+    ctx.OpBranch(loop_header);
+    ctx.AddLabel(loop_header);
+
+    ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+    ctx.OpBranch(continue_block);
+
+    ctx.AddLabel(continue_block);
+    const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index)
+                                    : ctx.OpAccessChain(element_pointer, base, index)};
+    if (value_type.value == ctx.F32[2].value) {
+        const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)};
+        const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)};
+        const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)};
+        const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)};
+        const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
+                                                        zero, u32_new_value, u32_value)};
+        const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)};
+        ctx.OpBranchConditional(success, merge_block, loop_header);
+
+        ctx.AddLabel(merge_block);
+        ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res));
+    } else {
+        const Id value{ctx.OpLoad(memory_type, word_pointer)};
+        const bool matching_type{value_type.value == memory_type.value};
+        const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)};
+        const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)};
+        const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)};
+        const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
+                                                        zero, new_value, value)};
+        const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)};
+        ctx.OpBranchConditional(success, merge_block, loop_header);
+
+        ctx.AddLabel(merge_block);
+        ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res));
+    }
+    ctx.OpFunctionEnd();
+    return func;
+}
+
+template <typename Desc>
+std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) {
+    if (desc.count > 1) {
+        return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index,
+                           desc.cbuf_offset, desc.count);
+    } else {
+        return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index,
+                           desc.cbuf_offset);
+    }
+}
+
+Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
+    if (count > 1) {
+        const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))};
+        return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type);
+    } else {
+        return pointer_type;
+    }
+}
+} // Anonymous namespace
+
+void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
+    defs[0] = sirit_ctx.Name(base_type, name);
+
+    std::array<char, 6> def_name;
+    for (int i = 1; i < 4; ++i) {
+        const std::string_view def_name_view(
+            def_name.data(),
+            fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
+        defs[static_cast<size_t>(i)] =
+            sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
+    }
+}
+
+EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
+                         IR::Program& program, Bindings& bindings)
+    : Sirit::Module(profile_.supported_spirv), profile{profile_},
+      runtime_info{runtime_info_}, stage{program.stage} {
+    const bool is_unified{profile.unified_descriptor_binding};
+    u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
+    u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
+    u32& texture_binding{is_unified ? bindings.unified : bindings.texture};
+    u32& image_binding{is_unified ? bindings.unified : bindings.image};
+    AddCapability(spv::Capability::Shader);
+    DefineCommonTypes(program.info);
+    DefineCommonConstants();
+    DefineInterfaces(program);
+    DefineLocalMemory(program);
+    DefineSharedMemory(program);
+    DefineSharedMemoryFunctions(program);
+    DefineConstantBuffers(program.info, uniform_binding);
+    DefineStorageBuffers(program.info, storage_binding);
+    DefineTextureBuffers(program.info, texture_binding);
+    DefineImageBuffers(program.info, image_binding);
+    DefineTextures(program.info, texture_binding);
+    DefineImages(program.info, image_binding);
+    DefineAttributeMemAccess(program.info);
+    DefineGlobalMemoryFunctions(program.info);
+}
+
+EmitContext::~EmitContext() = default;
+
+Id EmitContext::Def(const IR::Value& value) {
+    if (!value.IsImmediate()) {
+        return value.InstRecursive()->Definition<Id>();
+    }
+    switch (value.Type()) {
+    case IR::Type::Void:
+        // Void instructions are used for optional arguments (e.g. texture offsets)
+        // They are not meant to be used in the SPIR-V module
+        return Id{};
+    case IR::Type::U1:
+        return value.U1() ? true_value : false_value;
+    case IR::Type::U32:
+        return Const(value.U32());
+    case IR::Type::U64:
+        return Constant(U64, value.U64());
+    case IR::Type::F32:
+        return Const(value.F32());
+    case IR::Type::F64:
+        return Constant(F64[1], value.F64());
+    default:
+        throw NotImplementedException("Immediate type {}", value.Type());
+    }
+}
+
+Id EmitContext::BitOffset8(const IR::Value& offset) {
+    if (offset.IsImmediate()) {
+        return Const((offset.U32() % 4) * 8);
+    }
+    return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u));
+}
+
+Id EmitContext::BitOffset16(const IR::Value& offset) {
+    if (offset.IsImmediate()) {
+        return Const(((offset.U32() / 2) % 2) * 16);
+    }
+    return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u));
+}
+
+void EmitContext::DefineCommonTypes(const Info& info) {
+    void_id = TypeVoid();
+
+    U1 = Name(TypeBool(), "u1");
+
+    F32.Define(*this, TypeFloat(32), "f32");
+    U32.Define(*this, TypeInt(32, false), "u32");
+    S32.Define(*this, TypeInt(32, true), "s32");
+
+    private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
+
+    input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
+    input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
+    input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
+
+    output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
+    output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
+
+    if (info.uses_int8 && profile.support_int8) {
+        AddCapability(spv::Capability::Int8);
+        U8 = Name(TypeInt(8, false), "u8");
+        S8 = Name(TypeInt(8, true), "s8");
+    }
+    if (info.uses_int16 && profile.support_int16) {
+        AddCapability(spv::Capability::Int16);
+        U16 = Name(TypeInt(16, false), "u16");
+        S16 = Name(TypeInt(16, true), "s16");
+    }
+    if (info.uses_int64) {
+        AddCapability(spv::Capability::Int64);
+        U64 = Name(TypeInt(64, false), "u64");
+    }
+    if (info.uses_fp16) {
+        AddCapability(spv::Capability::Float16);
+        F16.Define(*this, TypeFloat(16), "f16");
+    }
+    if (info.uses_fp64) {
+        AddCapability(spv::Capability::Float64);
+        F64.Define(*this, TypeFloat(64), "f64");
+    }
+}
+
+void EmitContext::DefineCommonConstants() {
+    true_value = ConstantTrue(U1);
+    false_value = ConstantFalse(U1);
+    u32_zero_value = Const(0U);
+    f32_zero_value = Const(0.0f);
+}
+
+void EmitContext::DefineInterfaces(const IR::Program& program) {
+    DefineInputs(program);
+    DefineOutputs(program);
+}
+
+void EmitContext::DefineLocalMemory(const IR::Program& program) {
+    if (program.local_memory_size == 0) {
+        return;
+    }
+    const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
+    const Id type{TypeArray(U32[1], Const(num_elements))};
+    const Id pointer{TypePointer(spv::StorageClass::Private, type)};
+    local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
+    if (profile.supported_spirv >= 0x00010400) {
+        interfaces.push_back(local_memory);
+    }
+}
+
+void EmitContext::DefineSharedMemory(const IR::Program& program) {
+    if (program.shared_memory_size == 0) {
+        return;
+    }
+    const auto make{[&](Id element_type, u32 element_size) {
+        const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
+        const Id array_type{TypeArray(element_type, Const(num_elements))};
+        Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+        const Id struct_type{TypeStruct(array_type)};
+        MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
+        Decorate(struct_type, spv::Decoration::Block);
+
+        const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
+        const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
+        const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
+        Decorate(variable, spv::Decoration::Aliased);
+        interfaces.push_back(variable);
+
+        return std::make_tuple(variable, element_pointer, pointer);
+    }};
+    if (profile.support_explicit_workgroup_layout) {
+        AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
+        AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
+        if (program.info.uses_int8) {
+            AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
+            std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
+        }
+        if (program.info.uses_int16) {
+            AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
+            std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
+        }
+        if (program.info.uses_int64) {
+            std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8);
+        }
+        std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
+        std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
+        std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
+        return;
+    }
+    const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
+    const Id type{TypeArray(U32[1], Const(num_elements))};
+    shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
+
+    shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
+    shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
+    interfaces.push_back(shared_memory_u32);
+
+    const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
+    const auto make_function{[&](u32 mask, u32 size) {
+        const Id loop_header{OpLabel()};
+        const Id continue_block{OpLabel()};
+        const Id merge_block{OpLabel()};
+
+        const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
+        const Id offset{OpFunctionParameter(U32[1])};
+        const Id insert_value{OpFunctionParameter(U32[1])};
+        AddLabel();
+        OpBranch(loop_header);
+
+        AddLabel(loop_header);
+        const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+        const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))};
+        const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))};
+        const Id count{Const(size)};
+        OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+        OpBranch(continue_block);
+
+        AddLabel(continue_block);
+        const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
+        const Id old_value{OpLoad(U32[1], word_pointer)};
+        const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
+        const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value,
+                                                    u32_zero_value, new_value, old_value)};
+        const Id success{OpIEqual(U1, atomic_res, old_value)};
+        OpBranchConditional(success, merge_block, loop_header);
+
+        AddLabel(merge_block);
+        OpReturn();
+        OpFunctionEnd();
+        return func;
+    }};
+    if (program.info.uses_int8) {
+        shared_store_u8_func = make_function(24, 8);
+    }
+    if (program.info.uses_int16) {
+        shared_store_u16_func = make_function(16, 16);
+    }
+}
+
+void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) {
+    if (program.info.uses_shared_increment) {
+        increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type,
+                                       shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
+    }
+    if (program.info.uses_shared_decrement) {
+        decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type,
+                                       shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
+    }
+}
+
+void EmitContext::DefineAttributeMemAccess(const Info& info) {
+    const auto make_load{[&] {
+        const bool is_array{stage == Stage::Geometry};
+        const Id end_block{OpLabel()};
+        const Id default_label{OpLabel()};
+
+        const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1])
+                                         : TypeFunction(F32[1], U32[1])};
+        const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)};
+        const Id offset{OpFunctionParameter(U32[1])};
+        const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}};
+
+        AddLabel();
+        const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+        const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
+        const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
+        std::vector<Sirit::Literal> literals;
+        std::vector<Id> labels;
+        if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+            literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
+            labels.push_back(OpLabel());
+        }
+        const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+        for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
+            if (!info.loads.Generic(index)) {
+                continue;
+            }
+            literals.push_back(base_attribute_value + index);
+            labels.push_back(OpLabel());
+        }
+        OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
+        OpSwitch(compare_index, default_label, literals, labels);
+        AddLabel(default_label);
+        OpReturnValue(Const(0.0f));
+        size_t label_index{0};
+        if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+            AddLabel(labels[label_index]);
+            const Id pointer{is_array
+                                 ? OpAccessChain(input_f32, input_position, vertex, masked_index)
+                                 : OpAccessChain(input_f32, input_position, masked_index)};
+            const Id result{OpLoad(F32[1], pointer)};
+            OpReturnValue(result);
+            ++label_index;
+        }
+        for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+            if (!info.loads.Generic(index)) {
+                continue;
+            }
+            AddLabel(labels[label_index]);
+            const auto type{AttrTypes(*this, static_cast<u32>(index))};
+            if (!type) {
+                OpReturnValue(Const(0.0f));
+                ++label_index;
+                continue;
+            }
+            const Id generic_id{input_generics.at(index)};
+            const Id pointer{is_array
+                                 ? OpAccessChain(type->pointer, generic_id, vertex, masked_index)
+                                 : OpAccessChain(type->pointer, generic_id, masked_index)};
+            const Id value{OpLoad(type->id, pointer)};
+            const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
+            OpReturnValue(result);
+            ++label_index;
+        }
+        AddLabel(end_block);
+        OpUnreachable();
+        OpFunctionEnd();
+        return func;
+    }};
+    const auto make_store{[&] {
+        const Id end_block{OpLabel()};
+        const Id default_label{OpLabel()};
+
+        const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])};
+        const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)};
+        const Id offset{OpFunctionParameter(U32[1])};
+        const Id store_value{OpFunctionParameter(F32[1])};
+        AddLabel();
+        const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+        const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
+        const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
+        std::vector<Sirit::Literal> literals;
+        std::vector<Id> labels;
+        if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
+            literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
+            labels.push_back(OpLabel());
+        }
+        const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+        for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+            if (!info.stores.Generic(index)) {
+                continue;
+            }
+            literals.push_back(base_attribute_value + static_cast<u32>(index));
+            labels.push_back(OpLabel());
+        }
+        if (info.stores.ClipDistances()) {
+            literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2);
+            labels.push_back(OpLabel());
+            literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2);
+            labels.push_back(OpLabel());
+        }
+        OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
+        OpSwitch(compare_index, default_label, literals, labels);
+        AddLabel(default_label);
+        OpReturn();
+        size_t label_index{0};
+        if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
+            AddLabel(labels[label_index]);
+            const Id pointer{OpAccessChain(output_f32, output_position, masked_index)};
+            OpStore(pointer, store_value);
+            OpReturn();
+            ++label_index;
+        }
+        for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+            if (!info.stores.Generic(index)) {
+                continue;
+            }
+            if (output_generics[index][0].num_components != 4) {
+                throw NotImplementedException("Physical stores and transform feedbacks");
+            }
+            AddLabel(labels[label_index]);
+            const Id generic_id{output_generics[index][0].id};
+            const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)};
+            OpStore(pointer, store_value);
+            OpReturn();
+            ++label_index;
+        }
+        if (info.stores.ClipDistances()) {
+            AddLabel(labels[label_index]);
+            const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)};
+            OpStore(pointer, store_value);
+            OpReturn();
+            ++label_index;
+            AddLabel(labels[label_index]);
+            const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))};
+            const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)};
+            OpStore(pointer2, store_value);
+            OpReturn();
+            ++label_index;
+        }
+        AddLabel(end_block);
+        OpUnreachable();
+        OpFunctionEnd();
+        return func;
+    }};
+    if (info.loads_indexed_attributes) {
+        indexed_load_func = make_load();
+    }
+    if (info.stores_indexed_attributes) {
+        indexed_store_func = make_store();
+    }
+}
+
+void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
+    if (!info.uses_global_memory || !profile.support_int64) {
+        return;
+    }
+    using DefPtr = Id StorageDefinitions::*;
+    const Id zero{u32_zero_value};
+    const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
+                               auto&& callback) {
+        AddLabel();
+        const size_t num_buffers{info.storage_buffers_descriptors.size()};
+        for (size_t index = 0; index < num_buffers; ++index) {
+            if (!info.nvn_buffer_used[index]) {
+                continue;
+            }
+            const auto& ssbo{info.storage_buffers_descriptors[index]};
+            const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
+            const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
+            const Id ssbo_addr_pointer{OpAccessChain(
+                uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)};
+            const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
+                                                     zero, ssbo_size_cbuf_offset)};
+
+            const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
+            const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
+            const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
+            const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
+                                       OpULessThan(U1, addr, ssbo_end))};
+            const Id then_label{OpLabel()};
+            const Id else_label{OpLabel()};
+            OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
+            OpBranchConditional(cond, then_label, else_label);
+            AddLabel(then_label);
+            const Id ssbo_id{ssbos[index].*ssbo_member};
+            const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
+            const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
+            const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
+            callback(ssbo_pointer);
+            AddLabel(else_label);
+        }
+    }};
+    const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
+        const Id function_type{TypeFunction(type, U64)};
+        const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
+        const Id addr{OpFunctionParameter(U64)};
+        define_body(ssbo_member, addr, element_pointer, shift,
+                    [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
+        OpReturnValue(ConstantNull(type));
+        OpFunctionEnd();
+        return func_id;
+    }};
+    const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
+        const Id function_type{TypeFunction(void_id, U64, type)};
+        const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
+        const Id addr{OpFunctionParameter(U64)};
+        const Id data{OpFunctionParameter(type)};
+        define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
+            OpStore(ssbo_pointer, data);
+            OpReturn();
+        });
+        OpReturn();
+        OpFunctionEnd();
+        return func_id;
+    }};
+    const auto define{
+        [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
+            const Id element_type{type_def.element};
+            const u32 shift{static_cast<u32>(std::countr_zero(size))};
+            const Id load_func{define_load(ssbo_member, element_type, type, shift)};
+            const Id write_func{define_write(ssbo_member, element_type, type, shift)};
+            return std::make_pair(load_func, write_func);
+        }};
+    std::tie(load_global_func_u32, write_global_func_u32) =
+        define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
+    std::tie(load_global_func_u32x2, write_global_func_u32x2) =
+        define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2]));
+    std::tie(load_global_func_u32x4, write_global_func_u32x4) =
+        define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
+}
+
+void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
+    if (info.constant_buffer_descriptors.empty()) {
+        return;
+    }
+    if (!profile.support_descriptor_aliasing) {
+        DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u',
+                           sizeof(u32[4]));
+        for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
+            binding += desc.count;
+        }
+        return;
+    }
+    IR::Type types{info.used_constant_buffer_types};
+    if (True(types & IR::Type::U8)) {
+        if (profile.support_int8) {
+            DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8));
+            DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8));
+        } else {
+            types |= IR::Type::U32;
+        }
+    }
+    if (True(types & IR::Type::U16)) {
+        if (profile.support_int16) {
+            DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u',
+                               sizeof(u16));
+            DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's',
+                               sizeof(s16));
+        } else {
+            types |= IR::Type::U32;
+        }
+    }
+    if (True(types & IR::Type::U32)) {
+        DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u',
+                           sizeof(u32));
+    }
+    if (True(types & IR::Type::F32)) {
+        DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f',
+                           sizeof(f32));
+    }
+    if (True(types & IR::Type::U32x2)) {
+        DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u',
+                           sizeof(u32[2]));
+    }
+    binding += static_cast<u32>(info.constant_buffer_descriptors.size());
+}
+
+void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
+    if (info.storage_buffers_descriptors.empty()) {
+        return;
+    }
+    AddExtension("SPV_KHR_storage_buffer_storage_class");
+
+    const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types
+                                                                  : IR::Type::U32};
+    if (profile.support_int8 && True(used_types & IR::Type::U8)) {
+        DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8,
+                    sizeof(u8));
+        DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8,
+                    sizeof(u8));
+    }
+    if (profile.support_int16 && True(used_types & IR::Type::U16)) {
+        DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16,
+                    sizeof(u16));
+        DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16,
+                    sizeof(u16));
+    }
+    if (True(used_types & IR::Type::U32)) {
+        DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1],
+                    sizeof(u32));
+    }
+    if (True(used_types & IR::Type::F32)) {
+        DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1],
+                    sizeof(f32));
+    }
+    if (True(used_types & IR::Type::U64)) {
+        DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64,
+                    sizeof(u64));
+    }
+    if (True(used_types & IR::Type::U32x2)) {
+        DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2],
+                    sizeof(u32[2]));
+    }
+    if (True(used_types & IR::Type::U32x4)) {
+        DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4],
+                    sizeof(u32[4]));
+    }
+    for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
+        binding += desc.count;
+    }
+    const bool needs_function{
+        info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add ||
+        info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max ||
+        info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max};
+    if (needs_function) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+    }
+    if (info.uses_global_increment) {
+        increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array,
+                                     storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
+    }
+    if (info.uses_global_decrement) {
+        decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array,
+                                     storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
+    }
+    if (info.uses_atomic_f32_add) {
+        f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+                              storage_types.U32.element, F32[1], U32[1], spv::Scope::Device);
+    }
+    if (info.uses_atomic_f16x2_add) {
+        f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+                                storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+    }
+    if (info.uses_atomic_f16x2_min) {
+        f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
+                                storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+    }
+    if (info.uses_atomic_f16x2_max) {
+        f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
+                                storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+    }
+    if (info.uses_atomic_f32x2_add) {
+        f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+                                storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+    }
+    if (info.uses_atomic_f32x2_min) {
+        f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
+                                storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+    }
+    if (info.uses_atomic_f32x2_max) {
+        f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
+                                storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+    }
+}
+
+void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
+    if (info.texture_buffer_descriptors.empty()) {
+        return;
+    }
+    const spv::ImageFormat format{spv::ImageFormat::Unknown};
+    image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format);
+    sampled_texture_buffer_type = TypeSampledImage(image_buffer_type);
+
+    const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)};
+    texture_buffers.reserve(info.texture_buffer_descriptors.size());
+    for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) {
+        if (desc.count != 1) {
+            throw NotImplementedException("Array of texture buffers");
+        }
+        const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)};
+        Decorate(id, spv::Decoration::Binding, binding);
+        Decorate(id, spv::Decoration::DescriptorSet, 0U);
+        Name(id, NameOf(stage, desc, "texbuf"));
+        texture_buffers.push_back({
+            .id = id,
+            .count = desc.count,
+        });
+        if (profile.supported_spirv >= 0x00010400) {
+            interfaces.push_back(id);
+        }
+        ++binding;
+    }
+}
+
+void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
+    image_buffers.reserve(info.image_buffer_descriptors.size());
+    for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) {
+        if (desc.count != 1) {
+            throw NotImplementedException("Array of image buffers");
+        }
+        const spv::ImageFormat format{GetImageFormat(desc.format)};
+        const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)};
+        const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
+        const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
+        Decorate(id, spv::Decoration::Binding, binding);
+        Decorate(id, spv::Decoration::DescriptorSet, 0U);
+        Name(id, NameOf(stage, desc, "imgbuf"));
+        image_buffers.push_back({
+            .id = id,
+            .image_type = image_type,
+            .count = desc.count,
+        });
+        if (profile.supported_spirv >= 0x00010400) {
+            interfaces.push_back(id);
+        }
+        ++binding;
+    }
+}
+
+void EmitContext::DefineTextures(const Info& info, u32& binding) {
+    textures.reserve(info.texture_descriptors.size());
+    for (const TextureDescriptor& desc : info.texture_descriptors) {
+        const Id image_type{ImageType(*this, desc)};
+        const Id sampled_type{TypeSampledImage(image_type)};
+        const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
+        const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
+        const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)};
+        Decorate(id, spv::Decoration::Binding, binding);
+        Decorate(id, spv::Decoration::DescriptorSet, 0U);
+        Name(id, NameOf(stage, desc, "tex"));
+        textures.push_back({
+            .id = id,
+            .sampled_type = sampled_type,
+            .pointer_type = pointer_type,
+            .image_type = image_type,
+            .count = desc.count,
+        });
+        if (profile.supported_spirv >= 0x00010400) {
+            interfaces.push_back(id);
+        }
+        ++binding;
+    }
+    if (info.uses_atomic_image_u32) {
+        image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
+    }
+}
+
+void EmitContext::DefineImages(const Info& info, u32& binding) {
+    images.reserve(info.image_descriptors.size());
+    for (const ImageDescriptor& desc : info.image_descriptors) {
+        if (desc.count != 1) {
+            throw NotImplementedException("Array of images");
+        }
+        const Id image_type{ImageType(*this, desc)};
+        const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
+        const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
+        Decorate(id, spv::Decoration::Binding, binding);
+        Decorate(id, spv::Decoration::DescriptorSet, 0U);
+        Name(id, NameOf(stage, desc, "img"));
+        images.push_back({
+            .id = id,
+            .image_type = image_type,
+            .count = desc.count,
+        });
+        if (profile.supported_spirv >= 0x00010400) {
+            interfaces.push_back(id);
+        }
+        ++binding;
+    }
+}
+
+void EmitContext::DefineInputs(const IR::Program& program) {
+    const Info& info{program.info};
+    const VaryingState loads{info.loads.mask | info.passthrough.mask};
+
+    if (info.uses_workgroup_id) {
+        workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
+    }
+    if (info.uses_local_invocation_id) {
+        local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId);
+    }
+    if (info.uses_invocation_id) {
+        invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId);
+    }
+    if (info.uses_sample_id) {
+        sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
+    }
+    if (info.uses_is_helper_invocation) {
+        is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
+    }
+    if (info.uses_subgroup_mask) {
+        subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
+        subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
+        subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
+        subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
+        subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
+    }
+    if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
+        (profile.warp_size_potentially_larger_than_guest &&
+         (info.uses_subgroup_vote || info.uses_subgroup_mask))) {
+        subgroup_local_invocation_id =
+            DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
+    }
+    if (info.uses_fswzadd) {
+        const Id f32_one{Const(1.0f)};
+        const Id f32_minus_one{Const(-1.0f)};
+        const Id f32_zero{Const(0.0f)};
+        fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero);
+        fswzadd_lut_b =
+            ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one);
+    }
+    if (loads[IR::Attribute::PrimitiveId]) {
+        primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
+    }
+    if (loads.AnyComponent(IR::Attribute::PositionX)) {
+        const bool is_fragment{stage != Stage::Fragment};
+        const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
+        input_position = DefineInput(*this, F32[4], true, built_in);
+        if (profile.support_geometry_shader_passthrough) {
+            if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
+                Decorate(input_position, spv::Decoration::PassthroughNV);
+            }
+        }
+    }
+    if (loads[IR::Attribute::InstanceId]) {
+        if (profile.support_vertex_instance_id) {
+            instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
+        } else {
+            instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
+            base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
+        }
+    }
+    if (loads[IR::Attribute::VertexId]) {
+        if (profile.support_vertex_instance_id) {
+            vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
+        } else {
+            vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
+            base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
+        }
+    }
+    if (loads[IR::Attribute::FrontFace]) {
+        front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
+    }
+    if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) {
+        point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord);
+    }
+    if (loads[IR::Attribute::TessellationEvaluationPointU] ||
+        loads[IR::Attribute::TessellationEvaluationPointV]) {
+        tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
+    }
+    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+        const AttributeType input_type{runtime_info.generic_input_types[index]};
+        if (!runtime_info.previous_stage_stores.Generic(index)) {
+            continue;
+        }
+        if (!loads.Generic(index)) {
+            continue;
+        }
+        if (input_type == AttributeType::Disabled) {
+            continue;
+        }
+        const Id type{GetAttributeType(*this, input_type)};
+        const Id id{DefineInput(*this, type, true)};
+        Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+        Name(id, fmt::format("in_attr{}", index));
+        input_generics[index] = id;
+
+        if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) {
+            Decorate(id, spv::Decoration::PassthroughNV);
+        }
+        if (stage != Stage::Fragment) {
+            continue;
+        }
+        switch (info.interpolation[index]) {
+        case Interpolation::Smooth:
+            // Default
+            // Decorate(id, spv::Decoration::Smooth);
+            break;
+        case Interpolation::NoPerspective:
+            Decorate(id, spv::Decoration::NoPerspective);
+            break;
+        case Interpolation::Flat:
+            Decorate(id, spv::Decoration::Flat);
+            break;
+        }
+    }
+    if (stage == Stage::TessellationEval) {
+        for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+            if (!info.uses_patches[index]) {
+                continue;
+            }
+            const Id id{DefineInput(*this, F32[4], false)};
+            Decorate(id, spv::Decoration::Patch);
+            Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+            patches[index] = id;
+        }
+    }
+}
+
+void EmitContext::DefineOutputs(const IR::Program& program) {
+    const Info& info{program.info};
+    const std::optional<u32> invocations{program.invocations};
+    if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) {
+        output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position);
+    }
+    if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) {
+        if (stage == Stage::Fragment) {
+            throw NotImplementedException("Storing PointSize in fragment stage");
+        }
+        output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize);
+    }
+    if (info.stores.ClipDistances()) {
+        if (stage == Stage::Fragment) {
+            throw NotImplementedException("Storing ClipDistance in fragment stage");
+        }
+        const Id type{TypeArray(F32[1], Const(8U))};
+        clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance);
+    }
+    if (info.stores[IR::Attribute::Layer] &&
+        (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
+        if (stage == Stage::Fragment) {
+            throw NotImplementedException("Storing Layer in fragment stage");
+        }
+        layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer);
+    }
+    if (info.stores[IR::Attribute::ViewportIndex] &&
+        (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
+        if (stage == Stage::Fragment) {
+            throw NotImplementedException("Storing ViewportIndex in fragment stage");
+        }
+        viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex);
+    }
+    if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+        viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
+                                     spv::BuiltIn::ViewportMaskNV);
+    }
+    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+        if (info.stores.Generic(index)) {
+            DefineGenericOutput(*this, index, invocations);
+        }
+    }
+    switch (stage) {
+    case Stage::TessellationControl:
+        if (info.stores_tess_level_outer) {
+            const Id type{TypeArray(F32[1], Const(4U))};
+            output_tess_level_outer =
+                DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter);
+            Decorate(output_tess_level_outer, spv::Decoration::Patch);
+        }
+        if (info.stores_tess_level_inner) {
+            const Id type{TypeArray(F32[1], Const(2U))};
+            output_tess_level_inner =
+                DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner);
+            Decorate(output_tess_level_inner, spv::Decoration::Patch);
+        }
+        for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+            if (!info.uses_patches[index]) {
+                continue;
+            }
+            const Id id{DefineOutput(*this, F32[4], std::nullopt)};
+            Decorate(id, spv::Decoration::Patch);
+            Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+            patches[index] = id;
+        }
+        break;
+    case Stage::Fragment:
+        for (u32 index = 0; index < 8; ++index) {
+            if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
+                continue;
+            }
+            frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
+            Decorate(frag_color[index], spv::Decoration::Location, index);
+            Name(frag_color[index], fmt::format("frag_color{}", index));
+        }
+        if (info.stores_frag_depth) {
+            frag_depth = DefineOutput(*this, F32[1], std::nullopt);
+            Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
+        }
+        if (info.stores_sample_mask) {
+            sample_mask = DefineOutput(*this, U32[1], std::nullopt);
+            Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
new file mode 100644
index 000000000..e277bc358
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -0,0 +1,307 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string_view>
+
+#include <sirit/sirit.h>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Backend::SPIRV {
+
+using Sirit::Id;
+
+class VectorTypes {
+public:
+    void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name);
+
+    [[nodiscard]] Id operator[](size_t size) const noexcept {
+        return defs[size - 1];
+    }
+
+private:
+    std::array<Id, 4> defs{};
+};
+
+struct TextureDefinition {
+    Id id;
+    Id sampled_type;
+    Id pointer_type;
+    Id image_type;
+    u32 count;
+};
+
+struct TextureBufferDefinition {
+    Id id;
+    u32 count;
+};
+
+struct ImageBufferDefinition {
+    Id id;
+    Id image_type;
+    u32 count;
+};
+
+struct ImageDefinition {
+    Id id;
+    Id image_type;
+    u32 count;
+};
+
+struct UniformDefinitions {
+    Id U8{};
+    Id S8{};
+    Id U16{};
+    Id S16{};
+    Id U32{};
+    Id F32{};
+    Id U32x2{};
+    Id U32x4{};
+};
+
+struct StorageTypeDefinition {
+    Id array{};
+    Id element{};
+};
+
+struct StorageTypeDefinitions {
+    StorageTypeDefinition U8{};
+    StorageTypeDefinition S8{};
+    StorageTypeDefinition U16{};
+    StorageTypeDefinition S16{};
+    StorageTypeDefinition U32{};
+    StorageTypeDefinition U64{};
+    StorageTypeDefinition F32{};
+    StorageTypeDefinition U32x2{};
+    StorageTypeDefinition U32x4{};
+};
+
+struct StorageDefinitions {
+    Id U8{};
+    Id S8{};
+    Id U16{};
+    Id S16{};
+    Id U32{};
+    Id F32{};
+    Id U64{};
+    Id U32x2{};
+    Id U32x4{};
+};
+
+struct GenericElementInfo {
+    Id id{};
+    u32 first_element{};
+    u32 num_components{};
+};
+
+class EmitContext final : public Sirit::Module {
+public:
+    explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info,
+                         IR::Program& program, Bindings& binding);
+    ~EmitContext();
+
+    [[nodiscard]] Id Def(const IR::Value& value);
+
+    [[nodiscard]] Id BitOffset8(const IR::Value& offset);
+    [[nodiscard]] Id BitOffset16(const IR::Value& offset);
+
+    Id Const(u32 value) {
+        return Constant(U32[1], value);
+    }
+
+    Id Const(u32 element_1, u32 element_2) {
+        return ConstantComposite(U32[2], Const(element_1), Const(element_2));
+    }
+
+    Id Const(u32 element_1, u32 element_2, u32 element_3) {
+        return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3));
+    }
+
+    Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) {
+        return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3),
+                                 Const(element_4));
+    }
+
+    Id SConst(s32 value) {
+        return Constant(S32[1], value);
+    }
+
+    Id SConst(s32 element_1, s32 element_2) {
+        return ConstantComposite(S32[2], SConst(element_1), SConst(element_2));
+    }
+
+    Id SConst(s32 element_1, s32 element_2, s32 element_3) {
+        return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3));
+    }
+
+    Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) {
+        return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3),
+                                 SConst(element_4));
+    }
+
+    Id Const(f32 value) {
+        return Constant(F32[1], value);
+    }
+
+    const Profile& profile;
+    const RuntimeInfo& runtime_info;
+    Stage stage{};
+
+    Id void_id{};
+    Id U1{};
+    Id U8{};
+    Id S8{};
+    Id U16{};
+    Id S16{};
+    Id U64{};
+    VectorTypes F32;
+    VectorTypes U32;
+    VectorTypes S32;
+    VectorTypes F16;
+    VectorTypes F64;
+
+    Id true_value{};
+    Id false_value{};
+    Id u32_zero_value{};
+    Id f32_zero_value{};
+
+    UniformDefinitions uniform_types;
+    StorageTypeDefinitions storage_types;
+
+    Id private_u32{};
+
+    Id shared_u8{};
+    Id shared_u16{};
+    Id shared_u32{};
+    Id shared_u64{};
+    Id shared_u32x2{};
+    Id shared_u32x4{};
+
+    Id input_f32{};
+    Id input_u32{};
+    Id input_s32{};
+
+    Id output_f32{};
+    Id output_u32{};
+
+    Id image_buffer_type{};
+    Id sampled_texture_buffer_type{};
+    Id image_u32{};
+
+    std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
+    std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{};
+    std::vector<TextureBufferDefinition> texture_buffers;
+    std::vector<ImageBufferDefinition> image_buffers;
+    std::vector<TextureDefinition> textures;
+    std::vector<ImageDefinition> images;
+
+    Id workgroup_id{};
+    Id local_invocation_id{};
+    Id invocation_id{};
+    Id sample_id{};
+    Id is_helper_invocation{};
+    Id subgroup_local_invocation_id{};
+    Id subgroup_mask_eq{};
+    Id subgroup_mask_lt{};
+    Id subgroup_mask_le{};
+    Id subgroup_mask_gt{};
+    Id subgroup_mask_ge{};
+    Id instance_id{};
+    Id instance_index{};
+    Id base_instance{};
+    Id vertex_id{};
+    Id vertex_index{};
+    Id base_vertex{};
+    Id front_face{};
+    Id point_coord{};
+    Id tess_coord{};
+    Id clip_distances{};
+    Id layer{};
+    Id viewport_index{};
+    Id viewport_mask{};
+    Id primitive_id{};
+
+    Id fswzadd_lut_a{};
+    Id fswzadd_lut_b{};
+
+    Id indexed_load_func{};
+    Id indexed_store_func{};
+
+    Id local_memory{};
+
+    Id shared_memory_u8{};
+    Id shared_memory_u16{};
+    Id shared_memory_u32{};
+    Id shared_memory_u64{};
+    Id shared_memory_u32x2{};
+    Id shared_memory_u32x4{};
+
+    Id shared_memory_u32_type{};
+
+    Id shared_store_u8_func{};
+    Id shared_store_u16_func{};
+    Id increment_cas_shared{};
+    Id increment_cas_ssbo{};
+    Id decrement_cas_shared{};
+    Id decrement_cas_ssbo{};
+    Id f32_add_cas{};
+    Id f16x2_add_cas{};
+    Id f16x2_min_cas{};
+    Id f16x2_max_cas{};
+    Id f32x2_add_cas{};
+    Id f32x2_min_cas{};
+    Id f32x2_max_cas{};
+
+    Id load_global_func_u32{};
+    Id load_global_func_u32x2{};
+    Id load_global_func_u32x4{};
+    Id write_global_func_u32{};
+    Id write_global_func_u32x2{};
+    Id write_global_func_u32x4{};
+
+    Id input_position{};
+    std::array<Id, 32> input_generics{};
+
+    Id output_point_size{};
+    Id output_position{};
+    std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
+
+    Id output_tess_level_outer{};
+    Id output_tess_level_inner{};
+    std::array<Id, 30> patches{};
+
+    std::array<Id, 8> frag_color{};
+    Id sample_mask{};
+    Id frag_depth{};
+
+    std::vector<Id> interfaces;
+
+private:
+    void DefineCommonTypes(const Info& info);
+    void DefineCommonConstants();
+    void DefineInterfaces(const IR::Program& program);
+    void DefineLocalMemory(const IR::Program& program);
+    void DefineSharedMemory(const IR::Program& program);
+    void DefineSharedMemoryFunctions(const IR::Program& program);
+    void DefineConstantBuffers(const Info& info, u32& binding);
+    void DefineStorageBuffers(const Info& info, u32& binding);
+    void DefineTextureBuffers(const Info& info, u32& binding);
+    void DefineImageBuffers(const Info& info, u32& binding);
+    void DefineTextures(const Info& info, u32& binding);
+    void DefineImages(const Info& info, u32& binding);
+    void DefineAttributeMemAccess(const Info& info);
+    void DefineGlobalMemoryFunctions(const Info& info);
+
+    void DefineInputs(const IR::Program& program);
+    void DefineOutputs(const IR::Program& program);
+};
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
new file mode 100644
index 000000000..d7a86e270
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -0,0 +1,541 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <span>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "common/settings.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+    using ReturnType = ReturnType_;
+
+    static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+    template <size_t I>
+    using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <auto func, typename... Args>
+void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
+    inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
+}
+
+template <typename ArgType>
+ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
+    if constexpr (std::is_same_v<ArgType, Id>) {
+        return ctx.Def(arg);
+    } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+        return arg;
+    } else if constexpr (std::is_same_v<ArgType, u32>) {
+        return arg.U32();
+    } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+        return arg.Attribute();
+    } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+        return arg.Patch();
+    } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+        return arg.Reg();
+    }
+}
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+    using Traits = FuncTraits<decltype(func)>;
+    if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
+        if constexpr (is_first_arg_inst) {
+            SetDefinition<func>(
+                ctx, inst, inst,
+                Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+        } else {
+            SetDefinition<func>(
+                ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+        }
+    } else {
+        if constexpr (is_first_arg_inst) {
+            func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+        } else {
+            func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+        }
+    }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+    using Traits = FuncTraits<decltype(func)>;
+    static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+    if constexpr (Traits::NUM_ARGS == 1) {
+        Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+    } else {
+        using FirstArgType = typename Traits::template ArgType<1>;
+        static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
+        using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+        Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+    }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+    switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...)                                                             \
+    case IR::Opcode::name:                                                                         \
+        return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+    }
+    throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+Id TypeId(const EmitContext& ctx, IR::Type type) {
+    switch (type) {
+    case IR::Type::U1:
+        return ctx.U1;
+    case IR::Type::U32:
+        return ctx.U32[1];
+    default:
+        throw NotImplementedException("Phi node type {}", type);
+    }
+}
+
+void Traverse(EmitContext& ctx, IR::Program& program) {
+    IR::Block* current_block{};
+    for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+        switch (node.type) {
+        case IR::AbstractSyntaxNode::Type::Block: {
+            const Id label{node.data.block->Definition<Id>()};
+            if (current_block) {
+                ctx.OpBranch(label);
+            }
+            current_block = node.data.block;
+            ctx.AddLabel(label);
+            for (IR::Inst& inst : node.data.block->Instructions()) {
+                EmitInst(ctx, &inst);
+            }
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::If: {
+            const Id if_label{node.data.if_node.body->Definition<Id>()};
+            const Id endif_label{node.data.if_node.merge->Definition<Id>()};
+            ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
+            ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label);
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::Loop: {
+            const Id body_label{node.data.loop.body->Definition<Id>()};
+            const Id continue_label{node.data.loop.continue_block->Definition<Id>()};
+            const Id endloop_label{node.data.loop.merge->Definition<Id>()};
+
+            ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
+            ctx.OpBranch(body_label);
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::Break: {
+            const Id break_label{node.data.break_node.merge->Definition<Id>()};
+            const Id skip_label{node.data.break_node.skip->Definition<Id>()};
+            ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label);
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::EndIf:
+            if (current_block) {
+                ctx.OpBranch(node.data.end_if.merge->Definition<Id>());
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::Repeat: {
+            Id cond{ctx.Def(node.data.repeat.cond)};
+            if (!Settings::values.disable_shader_loop_safety_checks) {
+                const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])};
+                const Id safety_counter{ctx.AddGlobalVariable(
+                    pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))};
+                if (ctx.profile.supported_spirv >= 0x00010400) {
+                    ctx.interfaces.push_back(safety_counter);
+                }
+                const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)};
+                const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))};
+                ctx.OpStore(safety_counter, new_counter);
+
+                const Id safety_cond{
+                    ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)};
+                cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond);
+            }
+            const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()};
+            const Id merge_label{node.data.repeat.merge->Definition<Id>()};
+            ctx.OpBranchConditional(cond, loop_header_label, merge_label);
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::Return:
+            ctx.OpReturn();
+            break;
+        case IR::AbstractSyntaxNode::Type::Unreachable:
+            ctx.OpUnreachable();
+            break;
+        }
+        if (node.type != IR::AbstractSyntaxNode::Type::Block) {
+            current_block = nullptr;
+        }
+    }
+}
+
+Id DefineMain(EmitContext& ctx, IR::Program& program) {
+    const Id void_function{ctx.TypeFunction(ctx.void_id)};
+    const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
+    for (IR::Block* const block : program.blocks) {
+        block->SetDefinition(ctx.OpLabel());
+    }
+    Traverse(ctx, program);
+    ctx.OpFunctionEnd();
+    return main;
+}
+
+spv::ExecutionMode ExecutionMode(TessPrimitive primitive) {
+    switch (primitive) {
+    case TessPrimitive::Isolines:
+        return spv::ExecutionMode::Isolines;
+    case TessPrimitive::Triangles:
+        return spv::ExecutionMode::Triangles;
+    case TessPrimitive::Quads:
+        return spv::ExecutionMode::Quads;
+    }
+    throw InvalidArgument("Tessellation primitive {}", primitive);
+}
+
+spv::ExecutionMode ExecutionMode(TessSpacing spacing) {
+    switch (spacing) {
+    case TessSpacing::Equal:
+        return spv::ExecutionMode::SpacingEqual;
+    case TessSpacing::FractionalOdd:
+        return spv::ExecutionMode::SpacingFractionalOdd;
+    case TessSpacing::FractionalEven:
+        return spv::ExecutionMode::SpacingFractionalEven;
+    }
+    throw InvalidArgument("Tessellation spacing {}", spacing);
+}
+
+void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
+    const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
+    spv::ExecutionModel execution_model{};
+    switch (program.stage) {
+    case Stage::Compute: {
+        const std::array<u32, 3> workgroup_size{program.workgroup_size};
+        execution_model = spv::ExecutionModel::GLCompute;
+        ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
+                             workgroup_size[1], workgroup_size[2]);
+        break;
+    }
+    case Stage::VertexB:
+        execution_model = spv::ExecutionModel::Vertex;
+        break;
+    case Stage::TessellationControl:
+        execution_model = spv::ExecutionModel::TessellationControl;
+        ctx.AddCapability(spv::Capability::Tessellation);
+        ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations);
+        break;
+    case Stage::TessellationEval:
+        execution_model = spv::ExecutionModel::TessellationEvaluation;
+        ctx.AddCapability(spv::Capability::Tessellation);
+        ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive));
+        ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing));
+        ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise
+                                       ? spv::ExecutionMode::VertexOrderCw
+                                       : spv::ExecutionMode::VertexOrderCcw);
+        break;
+    case Stage::Geometry:
+        execution_model = spv::ExecutionModel::Geometry;
+        ctx.AddCapability(spv::Capability::Geometry);
+        ctx.AddCapability(spv::Capability::GeometryStreams);
+        switch (ctx.runtime_info.input_topology) {
+        case InputTopology::Points:
+            ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints);
+            break;
+        case InputTopology::Lines:
+            ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines);
+            break;
+        case InputTopology::LinesAdjacency:
+            ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency);
+            break;
+        case InputTopology::Triangles:
+            ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles);
+            break;
+        case InputTopology::TrianglesAdjacency:
+            ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency);
+            break;
+        }
+        switch (program.output_topology) {
+        case OutputTopology::PointList:
+            ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints);
+            break;
+        case OutputTopology::LineStrip:
+            ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip);
+            break;
+        case OutputTopology::TriangleStrip:
+            ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip);
+            break;
+        }
+        if (program.info.stores[IR::Attribute::PointSize]) {
+            ctx.AddCapability(spv::Capability::GeometryPointSize);
+        }
+        ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices);
+        ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations);
+        if (program.is_geometry_passthrough) {
+            if (ctx.profile.support_geometry_shader_passthrough) {
+                ctx.AddExtension("SPV_NV_geometry_shader_passthrough");
+                ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV);
+            } else {
+                LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support");
+            }
+        }
+        break;
+    case Stage::Fragment:
+        execution_model = spv::ExecutionModel::Fragment;
+        if (ctx.profile.lower_left_origin_mode) {
+            ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
+        } else {
+            ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
+        }
+        if (program.info.stores_frag_depth) {
+            ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
+        }
+        if (ctx.runtime_info.force_early_z) {
+            ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
+        }
+        break;
+    default:
+        throw NotImplementedException("Stage {}", program.stage);
+    }
+    ctx.AddEntryPoint(execution_model, main, "main", interfaces);
+}
+
+void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
+                        Id main_func) {
+    const Info& info{program.info};
+    if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
+        LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
+    } else if (info.uses_fp32_denorms_flush) {
+        if (profile.support_fp32_denorm_flush) {
+            ctx.AddCapability(spv::Capability::DenormFlushToZero);
+            ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
+        } else {
+            // Drivers will most likely flush denorms by default, no need to warn
+        }
+    } else if (info.uses_fp32_denorms_preserve) {
+        if (profile.support_fp32_denorm_preserve) {
+            ctx.AddCapability(spv::Capability::DenormPreserve);
+            ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
+        } else {
+            LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
+        }
+    }
+    if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) {
+        // No separate denorm behavior
+        return;
+    }
+    if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) {
+        LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader");
+    } else if (info.uses_fp16_denorms_flush) {
+        if (profile.support_fp16_denorm_flush) {
+            ctx.AddCapability(spv::Capability::DenormFlushToZero);
+            ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U);
+        } else {
+            // Same as fp32, no need to warn as most drivers will flush by default
+        }
+    } else if (info.uses_fp16_denorms_preserve) {
+        if (profile.support_fp16_denorm_preserve) {
+            ctx.AddCapability(spv::Capability::DenormPreserve);
+            ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
+        } else {
+            LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support");
+        }
+    }
+}
+
+void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program,
+                                EmitContext& ctx, Id main_func) {
+    if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
+        return;
+    }
+    if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
+        ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+        ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
+    }
+    if (profile.support_fp32_signed_zero_nan_preserve) {
+        ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+        ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
+    }
+    if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
+        ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+        ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U);
+    }
+}
+
+void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
+    if (info.uses_sampled_1d) {
+        ctx.AddCapability(spv::Capability::Sampled1D);
+    }
+    if (info.uses_sparse_residency) {
+        ctx.AddCapability(spv::Capability::SparseResidency);
+    }
+    if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) {
+        ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
+        ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
+    }
+    if (info.stores[IR::Attribute::ViewportIndex]) {
+        ctx.AddCapability(spv::Capability::MultiViewport);
+    }
+    if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+        ctx.AddExtension("SPV_NV_viewport_array2");
+        ctx.AddCapability(spv::Capability::ShaderViewportMaskNV);
+    }
+    if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) {
+        if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
+            ctx.AddExtension("SPV_EXT_shader_viewport_index_layer");
+            ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
+        }
+    }
+    if (!profile.support_vertex_instance_id &&
+        (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) {
+        ctx.AddExtension("SPV_KHR_shader_draw_parameters");
+        ctx.AddCapability(spv::Capability::DrawParameters);
+    }
+    if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
+         info.uses_subgroup_shuffles) &&
+        profile.support_vote) {
+        ctx.AddExtension("SPV_KHR_shader_ballot");
+        ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
+        if (!profile.warp_size_potentially_larger_than_guest) {
+            // vote ops are only used when not taking the long path
+            ctx.AddExtension("SPV_KHR_subgroup_vote");
+            ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
+        }
+    }
+    if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
+        ctx.AddCapability(spv::Capability::Int64Atomics);
+    }
+    if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
+        ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
+    }
+    if (info.uses_typeless_image_writes) {
+        ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
+    }
+    if (info.uses_image_buffers) {
+        ctx.AddCapability(spv::Capability::ImageBuffer);
+    }
+    if (info.uses_sample_id) {
+        ctx.AddCapability(spv::Capability::SampleRateShading);
+    }
+    if (!ctx.runtime_info.xfb_varyings.empty()) {
+        ctx.AddCapability(spv::Capability::TransformFeedback);
+    }
+    if (info.uses_derivatives) {
+        ctx.AddCapability(spv::Capability::DerivativeControl);
+    }
+    // TODO: Track this usage
+    ctx.AddCapability(spv::Capability::ImageGatherExtended);
+    ctx.AddCapability(spv::Capability::ImageQuery);
+    ctx.AddCapability(spv::Capability::SampledBuffer);
+}
+
+void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
+    auto inst{program.blocks.front()->begin()};
+    size_t block_index{0};
+    ctx.PatchDeferredPhi([&](size_t phi_arg) {
+        if (phi_arg == 0) {
+            ++inst;
+            if (inst == program.blocks[block_index]->end() ||
+                inst->GetOpcode() != IR::Opcode::Phi) {
+                do {
+                    ++block_index;
+                    inst = program.blocks[block_index]->begin();
+                } while (inst->GetOpcode() != IR::Opcode::Phi);
+            }
+        }
+        return ctx.Def(inst->Arg(phi_arg));
+    });
+}
+} // Anonymous namespace
+
+std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
+                           IR::Program& program, Bindings& bindings) {
+    EmitContext ctx{profile, runtime_info, program, bindings};
+    const Id main{DefineMain(ctx, program)};
+    DefineEntryPoint(program, ctx, main);
+    if (profile.support_float_controls) {
+        ctx.AddExtension("SPV_KHR_float_controls");
+        SetupDenormControl(profile, program, ctx, main);
+        SetupSignedNanCapabilities(profile, program, ctx, main);
+    }
+    SetupCapabilities(profile, program.info, ctx);
+    PatchPhiNodes(program, ctx);
+    return ctx.Assemble();
+}
+
+Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
+    const size_t num_args{inst->NumArgs()};
+    boost::container::small_vector<Id, 32> blocks;
+    blocks.reserve(num_args);
+    for (size_t index = 0; index < num_args; ++index) {
+        blocks.push_back(inst->PhiBlock(index)->Definition<Id>());
+    }
+    // The type of a phi instruction is stored in its flags
+    const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())};
+    return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size()));
+}
+
+void EmitVoid(EmitContext&) {}
+
+Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
+    const Id id{ctx.Def(value)};
+    if (!Sirit::ValidId(id)) {
+        throw NotImplementedException("Forward identity declaration");
+    }
+    return id;
+}
+
+Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
+    const Id id{ctx.Def(value)};
+    if (!Sirit::ValidId(id)) {
+        throw NotImplementedException("Forward identity declaration");
+    }
+    return id;
+}
+
+void EmitReference(EmitContext&) {}
+
+void EmitPhiMove(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetZeroFromOp(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetSignFromOp(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetCarryFromOp(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetOverflowFromOp(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetSparseFromOp(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetInBoundsFromOp(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
new file mode 100644
index 000000000..db0c935fe
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -0,0 +1,27 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/spirv/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::SPIRV {
+
+[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
+                                         IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) {
+    Bindings binding;
+    return EmitSPIRV(profile, {}, program, binding);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
new file mode 100644
index 000000000..9af8bb9e1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -0,0 +1,448 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
+    const Id shift_id{ctx.Const(2U)};
+    Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    if (index_offset > 0) {
+        index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
+    }
+    return ctx.profile.support_explicit_workgroup_layout
+               ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
+               : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
+}
+
+Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
+    if (offset.IsImmediate()) {
+        const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
+        return ctx.Const(imm_offset);
+    }
+    const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
+    const Id index{ctx.Def(offset)};
+    if (shift == 0) {
+        return index;
+    }
+    const Id shift_id{ctx.Const(shift)};
+    return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
+}
+
+Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def,
+                  Id StorageDefinitions::*member_ptr, const IR::Value& binding,
+                  const IR::Value& offset, size_t element_size) {
+    if (!binding.IsImmediate()) {
+        throw NotImplementedException("Dynamic storage buffer indexing");
+    }
+    const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
+    const Id index{StorageIndex(ctx, offset, element_size)};
+    return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
+}
+
+std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
+    const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
+    const Id semantics{ctx.u32_zero_value};
+    return {scope, semantics};
+}
+
+Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
+                   Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+    const Id pointer{SharedPointer(ctx, offset)};
+    const auto [scope, semantics]{AtomicArgs(ctx)};
+    return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+                    Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+    const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding,
+                                    offset, sizeof(u32))};
+    const auto [scope, semantics]{AtomicArgs(ctx)};
+    return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+                    Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id),
+                    Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
+    if (ctx.profile.support_int64_atomics) {
+        const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
+                                        binding, offset, sizeof(u64))};
+        const auto [scope, semantics]{AtomicArgs(ctx)};
+        return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
+    }
+    LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+    const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
+                                    binding, offset, sizeof(u32[2]))};
+    const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
+    const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)};
+    ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result));
+    return original_value;
+}
+} // Anonymous namespace
+
+Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) {
+    const Id shift_id{ctx.Const(2U)};
+    const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value);
+}
+
+Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) {
+    const Id shift_id{ctx.Const(2U)};
+    const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value);
+}
+
+Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) {
+    return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) {
+        const Id shift_id{ctx.Const(3U)};
+        const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+        const Id pointer{
+            ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
+        const auto [scope, semantics]{AtomicArgs(ctx)};
+        return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
+    }
+    LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+    const Id pointer_1{SharedPointer(ctx, offset, 0)};
+    const Id pointer_2{SharedPointer(ctx, offset, 1)};
+    const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
+    const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
+    const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)};
+    ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U));
+    ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U));
+    return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2));
+}
+
+Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                               Id value) {
+    return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd,
+                            &Sirit::Module::OpIAdd);
+}
+
+Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin,
+                            &Sirit::Module::OpSMin);
+}
+
+Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin,
+                            &Sirit::Module::OpUMin);
+}
+
+Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax,
+                            &Sirit::Module::OpSMax);
+}
+
+Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax,
+                            &Sirit::Module::OpUMax);
+}
+
+Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd,
+                            &Sirit::Module::OpBitwiseAnd);
+}
+
+Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value) {
+    return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr,
+                            &Sirit::Module::OpBitwiseOr);
+}
+
+Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor,
+                            &Sirit::Module::OpBitwiseXor);
+}
+
+Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                               Id value) {
+    if (ctx.profile.support_int64_atomics) {
+        const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
+                                        binding, offset, sizeof(u64))};
+        const auto [scope, semantics]{AtomicArgs(ctx)};
+        return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
+    }
+    LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+    const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
+                                    binding, offset, sizeof(u32[2]))};
+    const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
+    ctx.OpStore(pointer, value);
+    return original;
+}
+
+Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
+    return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
+    return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
+    return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
+    return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
+    return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()].U32};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
+    return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitGlobalAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicInc32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicDec32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicOr32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicXor32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicIAdd64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMin64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMin64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMax64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMax64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicInc64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicDec64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAnd64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicOr64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicXor64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicExchange64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF16x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF32x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMinF16x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMinF32x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
new file mode 100644
index 000000000..e0b52a001
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
+    const auto semantics{
+        spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
+        spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
+        spv::MemorySemanticsMask::ImageMemory};
+    ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics)));
+}
+} // Anonymous namespace
+
+void EmitBarrier(EmitContext& ctx) {
+    const auto execution{spv::Scope::Workgroup};
+    const auto memory{spv::Scope::Workgroup};
+    const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
+                                spv::MemorySemanticsMask::WorkgroupMemory};
+    ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)),
+                         ctx.Const(static_cast<u32>(memory)),
+                         ctx.Const(static_cast<u32>(memory_semantics)));
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+    MemoryBarrier(ctx, spv::Scope::Workgroup);
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+    MemoryBarrier(ctx, spv::Scope::Device);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
new file mode 100644
index 000000000..bb11f4f4e
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+void EmitBitCastU16F16(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.U32[1], value);
+}
+
+void EmitBitCastU64F64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitBitCastF16U16(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.F32[1], value);
+}
+
+void EmitBitCastF64U64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitPackUint2x32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.U64, value);
+}
+
+Id EmitUnpackUint2x32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.U32[2], value);
+}
+
+Id EmitPackFloat2x16(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.U32[1], value);
+}
+
+Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.F16[2], value);
+}
+
+Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
+    return ctx.OpPackHalf2x16(ctx.U32[1], value);
+}
+
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
+    return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
+}
+
+Id EmitPackDouble2x32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.F64[1], value);
+}
+
+Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.U32[2], value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
new file mode 100644
index 000000000..10ff4ecab
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -0,0 +1,155 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
+    return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
+}
+
+Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+    return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+    return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index);
+}
+
+Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index);
+}
+
+Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
+}
+
+Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
+    return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
+}
+
+Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+    return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+    return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
+}
+
+Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
+    return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
+}
+
+Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+    return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+    return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
+}
+
+void EmitCompositeConstructF64x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeConstructF64x3(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeConstructF64x4(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x3(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x4(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+    return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
new file mode 100644
index 000000000..fb8c02a77
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -0,0 +1,505 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <tuple>
+#include <utility>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+struct AttrInfo {
+    Id pointer;
+    Id id;
+    bool needs_cast;
+};
+
+std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
+    const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
+    switch (type) {
+    case AttributeType::Float:
+        return AttrInfo{ctx.input_f32, ctx.F32[1], false};
+    case AttributeType::UnsignedInt:
+        return AttrInfo{ctx.input_u32, ctx.U32[1], true};
+    case AttributeType::SignedInt:
+        return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
+    case AttributeType::Disabled:
+        return std::nullopt;
+    }
+    throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+template <typename... Args>
+Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
+    switch (ctx.stage) {
+    case Stage::TessellationControl:
+    case Stage::TessellationEval:
+    case Stage::Geometry:
+        return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...);
+    default:
+        return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...);
+    }
+}
+
+template <typename... Args>
+Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
+    if (ctx.stage == Stage::TessellationControl) {
+        const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)};
+        return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...);
+    } else {
+        return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...);
+    }
+}
+
+struct OutAttr {
+    OutAttr(Id pointer_) : pointer{pointer_} {}
+    OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {}
+
+    Id pointer{};
+    Id type{};
+};
+
+std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        const u32 element{IR::GenericAttributeElement(attr)};
+        const GenericElementInfo& info{ctx.output_generics.at(index).at(element)};
+        if (info.num_components == 1) {
+            return info.id;
+        } else {
+            const u32 index_element{element - info.first_element};
+            const Id index_id{ctx.Const(index_element)};
+            return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
+        }
+    }
+    switch (attr) {
+    case IR::Attribute::PointSize:
+        return ctx.output_point_size;
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW: {
+        const u32 element{static_cast<u32>(attr) % 4};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
+    }
+    case IR::Attribute::ClipDistance0:
+    case IR::Attribute::ClipDistance1:
+    case IR::Attribute::ClipDistance2:
+    case IR::Attribute::ClipDistance3:
+    case IR::Attribute::ClipDistance4:
+    case IR::Attribute::ClipDistance5:
+    case IR::Attribute::ClipDistance6:
+    case IR::Attribute::ClipDistance7: {
+        const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
+        const u32 index{static_cast<u32>(attr) - base};
+        const Id clip_num{ctx.Const(index)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num);
+    }
+    case IR::Attribute::Layer:
+        if (ctx.profile.support_viewport_index_layer_non_geometry ||
+            ctx.stage == Shader::Stage::Geometry) {
+            return OutAttr{ctx.layer, ctx.U32[1]};
+        }
+        return std::nullopt;
+    case IR::Attribute::ViewportIndex:
+        if (ctx.profile.support_viewport_index_layer_non_geometry ||
+            ctx.stage == Shader::Stage::Geometry) {
+            return OutAttr{ctx.viewport_index, ctx.U32[1]};
+        }
+        return std::nullopt;
+    case IR::Attribute::ViewportMask:
+        if (!ctx.profile.support_viewport_mask) {
+            return std::nullopt;
+        }
+        return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value),
+                       ctx.U32[1]};
+    default:
+        throw NotImplementedException("Read attribute {}", attr);
+    }
+}
+
+Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
+           const IR::Value& binding, const IR::Value& offset) {
+    if (!binding.IsImmediate()) {
+        throw NotImplementedException("Constant buffer indexing");
+    }
+    const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
+    const Id uniform_type{ctx.uniform_types.*member_ptr};
+    if (!offset.IsImmediate()) {
+        Id index{ctx.Def(offset)};
+        if (element_size > 1) {
+            const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
+            const Id shift{ctx.Const(log2_element_size)};
+            index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
+        }
+        const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
+        return ctx.OpLoad(result_type, access_chain);
+    }
+    // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
+    const Id imm_offset{ctx.Const(offset.U32() / element_size)};
+    const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
+    return ctx.OpLoad(result_type, access_chain);
+}
+
+Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset);
+}
+
+Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset);
+}
+
+Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) {
+    if (offset.IsImmediate()) {
+        const u32 element{(offset.U32() / 4) % 4 + index_offset};
+        return ctx.OpCompositeExtract(ctx.U32[1], vector, element);
+    }
+    const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))};
+    Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))};
+    if (index_offset > 0) {
+        element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset));
+    }
+    return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element);
+}
+} // Anonymous namespace
+
+void EmitGetRegister(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitSetRegister(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetPred(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitSetPred(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitSetGotoVariable(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetGotoVariable(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitSetIndirectBranchVariable(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+void EmitGetIndirectBranchVariable(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
+        const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};
+        return ctx.OpUConvert(ctx.U32[1], load);
+    }
+    Id element{};
+    if (ctx.profile.support_descriptor_aliasing) {
+        element = GetCbufU32(ctx, binding, offset);
+    } else {
+        const Id vector{GetCbufU32x4(ctx, binding, offset)};
+        element = GetCbufElement(ctx, vector, offset, 0u);
+    }
+    const Id bit_offset{ctx.BitOffset8(offset)};
+    return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
+}
+
+Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
+        const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)};
+        return ctx.OpSConvert(ctx.U32[1], load);
+    }
+    Id element{};
+    if (ctx.profile.support_descriptor_aliasing) {
+        element = GetCbufU32(ctx, binding, offset);
+    } else {
+        const Id vector{GetCbufU32x4(ctx, binding, offset)};
+        element = GetCbufElement(ctx, vector, offset, 0u);
+    }
+    const Id bit_offset{ctx.BitOffset8(offset)};
+    return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
+}
+
+Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
+        const Id load{
+            GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)};
+        return ctx.OpUConvert(ctx.U32[1], load);
+    }
+    Id element{};
+    if (ctx.profile.support_descriptor_aliasing) {
+        element = GetCbufU32(ctx, binding, offset);
+    } else {
+        const Id vector{GetCbufU32x4(ctx, binding, offset)};
+        element = GetCbufElement(ctx, vector, offset, 0u);
+    }
+    const Id bit_offset{ctx.BitOffset16(offset)};
+    return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
+}
+
+Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
+        const Id load{
+            GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)};
+        return ctx.OpSConvert(ctx.U32[1], load);
+    }
+    Id element{};
+    if (ctx.profile.support_descriptor_aliasing) {
+        element = GetCbufU32(ctx, binding, offset);
+    } else {
+        const Id vector{GetCbufU32x4(ctx, binding, offset)};
+        element = GetCbufElement(ctx, vector, offset, 0u);
+    }
+    const Id bit_offset{ctx.BitOffset16(offset)};
+    return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
+}
+
+Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing) {
+        return GetCbufU32(ctx, binding, offset);
+    } else {
+        const Id vector{GetCbufU32x4(ctx, binding, offset)};
+        return GetCbufElement(ctx, vector, offset, 0u);
+    }
+}
+
+Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing) {
+        return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset);
+    } else {
+        const Id vector{GetCbufU32x4(ctx, binding, offset)};
+        return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u));
+    }
+}
+
+Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing) {
+        return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding,
+                       offset);
+    } else {
+        const Id vector{GetCbufU32x4(ctx, binding, offset)};
+        return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u),
+                                        GetCbufElement(ctx, vector, offset, 1u));
+    }
+}
+
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
+    const u32 element{static_cast<u32>(attr) % 4};
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
+        if (!type) {
+            // Attribute is disabled
+            return ctx.Const(element == 3 ? 1.0f : 0.0f);
+        }
+        if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+            // Varying component is not written
+            return ctx.Const(type && element == 3 ? 1.0f : 0.0f);
+        }
+        const Id generic_id{ctx.input_generics.at(index)};
+        const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
+        const Id value{ctx.OpLoad(type->id, pointer)};
+        return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
+    }
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
+                                                  ctx.Const(element)));
+    case IR::Attribute::InstanceId:
+        if (ctx.profile.support_vertex_instance_id) {
+            return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
+        } else {
+            const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
+            const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
+            return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
+        }
+    case IR::Attribute::VertexId:
+        if (ctx.profile.support_vertex_instance_id) {
+            return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id));
+        } else {
+            const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
+            const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
+            return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
+        }
+    case IR::Attribute::FrontFace:
+        return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
+                            ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
+    case IR::Attribute::PointSpriteS:
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
+    case IR::Attribute::PointSpriteT:
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U)));
+    case IR::Attribute::TessellationEvaluationPointU:
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
+    case IR::Attribute::TessellationEvaluationPointV:
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U)));
+
+    default:
+        throw NotImplementedException("Read attribute {}", attr);
+    }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
+    const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
+    if (!output) {
+        return;
+    }
+    if (Sirit::ValidId(output->type)) {
+        value = ctx.OpBitcast(output->type, value);
+    }
+    ctx.OpStore(output->pointer, value);
+}
+
+Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) {
+    switch (ctx.stage) {
+    case Stage::TessellationControl:
+    case Stage::TessellationEval:
+    case Stage::Geometry:
+        return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex);
+    default:
+        return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset);
+    }
+}
+
+void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) {
+    ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);
+}
+
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
+    if (!IR::IsGeneric(patch)) {
+        throw NotImplementedException("Non-generic patch load");
+    }
+    const u32 index{IR::GenericPatchIndex(patch)};
+    const Id element{ctx.Const(IR::GenericPatchElement(patch))};
+    const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32};
+    const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
+    return ctx.OpLoad(ctx.F32[1], pointer);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
+    const Id pointer{[&] {
+        if (IR::IsGeneric(patch)) {
+            const u32 index{IR::GenericPatchIndex(patch)};
+            const Id element{ctx.Const(IR::GenericPatchElement(patch))};
+            return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
+        }
+        switch (patch) {
+        case IR::Patch::TessellationLodLeft:
+        case IR::Patch::TessellationLodRight:
+        case IR::Patch::TessellationLodTop:
+        case IR::Patch::TessellationLodBottom: {
+            const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+            const Id index_id{ctx.Const(index)};
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
+        }
+        case IR::Patch::TessellationLodInteriorU:
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
+                                     ctx.u32_zero_value);
+        case IR::Patch::TessellationLodInteriorV:
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u));
+        default:
+            throw NotImplementedException("Patch {}", patch);
+        }
+    }()};
+    ctx.OpStore(pointer, value);
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
+    const Id component_id{ctx.Const(component)};
+    const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
+    ctx.OpStore(pointer, value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, Id value) {
+    ctx.OpStore(ctx.sample_mask, value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, Id value) {
+    ctx.OpStore(ctx.frag_depth, value);
+}
+
+void EmitGetZFlag(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetSFlag(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetCFlag(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetOFlag(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetZFlag(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetSFlag(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetCFlag(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetOFlag(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitWorkgroupId(EmitContext& ctx) {
+    return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id);
+}
+
+Id EmitLocalInvocationId(EmitContext& ctx) {
+    return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
+}
+
+Id EmitInvocationId(EmitContext& ctx) {
+    return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
+}
+
+Id EmitSampleId(EmitContext& ctx) {
+    return ctx.OpLoad(ctx.U32[1], ctx.sample_id);
+}
+
+Id EmitIsHelperInvocation(EmitContext& ctx) {
+    return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation);
+}
+
+Id EmitYDirection(EmitContext& ctx) {
+    return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
+}
+
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
+    const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+    return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
+    const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+    ctx.OpStore(pointer, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
new file mode 100644
index 000000000..d33486f28
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+void EmitJoin(EmitContext&) {
+    throw NotImplementedException("Join shouldn't be emitted");
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+    if (ctx.profile.support_demote_to_helper_invocation) {
+        ctx.OpDemoteToHelperInvocationEXT();
+    } else {
+        const Id kill_label{ctx.OpLabel()};
+        const Id impossible_label{ctx.OpLabel()};
+        ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone);
+        ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label);
+        ctx.AddLabel(kill_label);
+        ctx.OpKill();
+        ctx.AddLabel(impossible_label);
+    }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
new file mode 100644
index 000000000..fd42b7a16
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -0,0 +1,269 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id ExtractU16(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int16) {
+        return ctx.OpUConvert(ctx.U16, value);
+    } else {
+        return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
+    }
+}
+
+Id ExtractS16(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int16) {
+        return ctx.OpSConvert(ctx.S16, value);
+    } else {
+        return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
+    }
+}
+
+Id ExtractU8(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int8) {
+        return ctx.OpUConvert(ctx.U8, value);
+    } else {
+        return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
+    }
+}
+
+Id ExtractS8(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int8) {
+        return ctx.OpSConvert(ctx.S8, value);
+    } else {
+        return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
+    }
+}
+} // Anonymous namespace
+
+Id EmitConvertS16F16(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int16) {
+        return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+    } else {
+        return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+    }
+}
+
+Id EmitConvertS16F32(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int16) {
+        return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+    } else {
+        return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+    }
+}
+
+Id EmitConvertS16F64(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int16) {
+        return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+    } else {
+        return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+    }
+}
+
+Id EmitConvertS32F16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS32F32(EmitContext& ctx, Id value) {
+    if (ctx.profile.has_broken_signed_operations) {
+        return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
+    } else {
+        return ctx.OpConvertFToS(ctx.U32[1], value);
+    }
+}
+
+Id EmitConvertS32F64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS64F16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertS64F32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertS64F64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertU16F16(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int16) {
+        return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+    } else {
+        return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+    }
+}
+
+Id EmitConvertU16F32(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int16) {
+        return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+    } else {
+        return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+    }
+}
+
+Id EmitConvertU16F64(EmitContext& ctx, Id value) {
+    if (ctx.profile.support_int16) {
+        return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+    } else {
+        return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+    }
+}
+
+Id EmitConvertU32F16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU32F32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU32F64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU64F16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64F32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64F64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64U32(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U64, value);
+}
+
+Id EmitConvertU32U64(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U32[1], value);
+}
+
+Id EmitConvertF16F32(EmitContext& ctx, Id value) {
+    return ctx.OpFConvert(ctx.F16[1], value);
+}
+
+Id EmitConvertF32F16(EmitContext& ctx, Id value) {
+    return ctx.OpFConvert(ctx.F32[1], value);
+}
+
+Id EmitConvertF32F64(EmitContext& ctx, Id value) {
+    return ctx.OpFConvert(ctx.F32[1], value);
+}
+
+Id EmitConvertF64F32(EmitContext& ctx, Id value) {
+    return ctx.OpFConvert(ctx.F64[1], value);
+}
+
+Id EmitConvertF16S8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF16S16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF16S32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16S64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF16U16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF16U32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF32S8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF32S16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF32S32(EmitContext& ctx, Id value) {
+    if (ctx.profile.has_broken_signed_operations) {
+        value = ctx.OpBitcast(ctx.S32[1], value);
+    }
+    return ctx.OpConvertSToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32S64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32U8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF32U16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF32U32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32U64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF64S8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF64S16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF64S32(EmitContext& ctx, Id value) {
+    if (ctx.profile.has_broken_signed_operations) {
+        value = ctx.OpBitcast(ctx.S32[1], value);
+    }
+    return ctx.OpConvertSToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64S64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64U8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF64U16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF64U32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64U64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F64[1], value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
new file mode 100644
index 000000000..61cf25f9c
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -0,0 +1,396 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
+    const auto flags{inst->Flags<IR::FpControl>()};
+    if (flags.no_contraction) {
+        ctx.Decorate(op, spv::Decoration::NoContraction);
+    }
+    return op;
+}
+
+Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
+    if (ctx.profile.has_broken_spirv_clamp) {
+        return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
+    } else {
+        return ctx.OpFClamp(type, value, zero, one);
+    }
+}
+
+Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    if (ctx.profile.ignore_nan_fp_comparisons) {
+        const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)};
+        const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))};
+        const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))};
+        return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan);
+    } else {
+        return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
+    }
+}
+
+Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) {
+    if (ctx.profile.ignore_nan_fp_comparisons) {
+        const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)};
+        const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)};
+        const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)};
+        return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan);
+    } else {
+        return (ctx.*comp_func)(ctx.U1, lhs, rhs);
+    }
+}
+} // Anonymous namespace
+
+Id EmitFPAbs16(EmitContext& ctx, Id value) {
+    return ctx.OpFAbs(ctx.F16[1], value);
+}
+
+Id EmitFPAbs32(EmitContext& ctx, Id value) {
+    return ctx.OpFAbs(ctx.F32[1], value);
+}
+
+Id EmitFPAbs64(EmitContext& ctx, Id value) {
+    return ctx.OpFAbs(ctx.F64[1], value);
+}
+
+Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
+}
+
+Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
+}
+
+Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
+}
+
+Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+    return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
+}
+
+Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+    return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
+}
+
+Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+    return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
+}
+
+Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpFMax(ctx.F32[1], a, b);
+}
+
+Id EmitFPMax64(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpFMax(ctx.F64[1], a, b);
+}
+
+Id EmitFPMin32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpFMin(ctx.F32[1], a, b);
+}
+
+Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpFMin(ctx.F64[1], a, b);
+}
+
+Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
+}
+
+Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
+}
+
+Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
+}
+
+Id EmitFPNeg16(EmitContext& ctx, Id value) {
+    return ctx.OpFNegate(ctx.F16[1], value);
+}
+
+Id EmitFPNeg32(EmitContext& ctx, Id value) {
+    return ctx.OpFNegate(ctx.F32[1], value);
+}
+
+Id EmitFPNeg64(EmitContext& ctx, Id value) {
+    return ctx.OpFNegate(ctx.F64[1], value);
+}
+
+Id EmitFPSin(EmitContext& ctx, Id value) {
+    return ctx.OpSin(ctx.F32[1], value);
+}
+
+Id EmitFPCos(EmitContext& ctx, Id value) {
+    return ctx.OpCos(ctx.F32[1], value);
+}
+
+Id EmitFPExp2(EmitContext& ctx, Id value) {
+    return ctx.OpExp2(ctx.F32[1], value);
+}
+
+Id EmitFPLog2(EmitContext& ctx, Id value) {
+    return ctx.OpLog2(ctx.F32[1], value);
+}
+
+Id EmitFPRecip32(EmitContext& ctx, Id value) {
+    return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value);
+}
+
+Id EmitFPRecip64(EmitContext& ctx, Id value) {
+    return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value);
+}
+
+Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) {
+    return ctx.OpInverseSqrt(ctx.F32[1], value);
+}
+
+Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) {
+    return ctx.OpInverseSqrt(ctx.F64[1], value);
+}
+
+Id EmitFPSqrt(EmitContext& ctx, Id value) {
+    return ctx.OpSqrt(ctx.F32[1], value);
+}
+
+Id EmitFPSaturate16(EmitContext& ctx, Id value) {
+    const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
+    const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
+    return Clamp(ctx, ctx.F16[1], value, zero, one);
+}
+
+Id EmitFPSaturate32(EmitContext& ctx, Id value) {
+    const Id zero{ctx.Const(f32{0.0})};
+    const Id one{ctx.Const(f32{1.0})};
+    return Clamp(ctx, ctx.F32[1], value, zero, one);
+}
+
+Id EmitFPSaturate64(EmitContext& ctx, Id value) {
+    const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
+    const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
+    return Clamp(ctx, ctx.F64[1], value, zero, one);
+}
+
+Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+    return Clamp(ctx, ctx.F16[1], value, min_value, max_value);
+}
+
+Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+    return Clamp(ctx, ctx.F32[1], value, min_value, max_value);
+}
+
+Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+    return Clamp(ctx, ctx.F64[1], value, min_value, max_value);
+}
+
+Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
+    return ctx.OpRoundEven(ctx.F16[1], value);
+}
+
+Id EmitFPRoundEven32(EmitContext& ctx, Id value) {
+    return ctx.OpRoundEven(ctx.F32[1], value);
+}
+
+Id EmitFPRoundEven64(EmitContext& ctx, Id value) {
+    return ctx.OpRoundEven(ctx.F64[1], value);
+}
+
+Id EmitFPFloor16(EmitContext& ctx, Id value) {
+    return ctx.OpFloor(ctx.F16[1], value);
+}
+
+Id EmitFPFloor32(EmitContext& ctx, Id value) {
+    return ctx.OpFloor(ctx.F32[1], value);
+}
+
+Id EmitFPFloor64(EmitContext& ctx, Id value) {
+    return ctx.OpFloor(ctx.F64[1], value);
+}
+
+Id EmitFPCeil16(EmitContext& ctx, Id value) {
+    return ctx.OpCeil(ctx.F16[1], value);
+}
+
+Id EmitFPCeil32(EmitContext& ctx, Id value) {
+    return ctx.OpCeil(ctx.F32[1], value);
+}
+
+Id EmitFPCeil64(EmitContext& ctx, Id value) {
+    return ctx.OpCeil(ctx.F64[1], value);
+}
+
+Id EmitFPTrunc16(EmitContext& ctx, Id value) {
+    return ctx.OpTrunc(ctx.F16[1], value);
+}
+
+Id EmitFPTrunc32(EmitContext& ctx, Id value) {
+    return ctx.OpTrunc(ctx.F32[1], value);
+}
+
+Id EmitFPTrunc64(EmitContext& ctx, Id value) {
+    return ctx.OpTrunc(ctx.F64[1], value);
+}
+
+Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPIsNan16(EmitContext& ctx, Id value) {
+    return ctx.OpIsNan(ctx.U1, value);
+}
+
+Id EmitFPIsNan32(EmitContext& ctx, Id value) {
+    return ctx.OpIsNan(ctx.U1, value);
+}
+
+Id EmitFPIsNan64(EmitContext& ctx, Id value) {
+    return ctx.OpIsNan(ctx.U1, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
new file mode 100644
index 000000000..3588f052b
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -0,0 +1,462 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <boost/container/static_vector.hpp>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+class ImageOperands {
+public:
+    explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
+                           Id lod, const IR::Value& offset) {
+        if (has_bias) {
+            const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
+            Add(spv::ImageOperandsMask::Bias, bias);
+        }
+        if (has_lod) {
+            const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
+            Add(spv::ImageOperandsMask::Lod, lod_value);
+        }
+        AddOffset(ctx, offset);
+        if (has_lod_clamp) {
+            const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
+            Add(spv::ImageOperandsMask::MinLod, lod_clamp);
+        }
+    }
+
+    explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) {
+        if (offset2.IsEmpty()) {
+            if (offset.IsEmpty()) {
+                return;
+            }
+            Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
+            return;
+        }
+        const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
+        if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
+            LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring");
+            return;
+        }
+        const IR::Opcode opcode{values[0]->GetOpcode()};
+        if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
+            throw LogicError("Invalid PTP arguments");
+        }
+        auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
+
+        const Id offsets{ctx.ConstantComposite(
+            ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)),
+            ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)),
+            ctx.Const(read(1, 2), read(1, 3)))};
+        Add(spv::ImageOperandsMask::ConstOffsets, offsets);
+    }
+
+    explicit ImageOperands(Id offset, Id lod, Id ms) {
+        if (Sirit::ValidId(lod)) {
+            Add(spv::ImageOperandsMask::Lod, lod);
+        }
+        if (Sirit::ValidId(offset)) {
+            Add(spv::ImageOperandsMask::Offset, offset);
+        }
+        if (Sirit::ValidId(ms)) {
+            Add(spv::ImageOperandsMask::Sample, ms);
+        }
+    }
+
+    explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates,
+                           Id offset, Id lod_clamp) {
+        if (!Sirit::ValidId(derivates)) {
+            throw LogicError("Derivates must be present");
+        }
+        boost::container::static_vector<Id, 3> deriv_x_accum;
+        boost::container::static_vector<Id, 3> deriv_y_accum;
+        for (u32 i = 0; i < num_derivates; ++i) {
+            deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2));
+            deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1));
+        }
+        const Id derivates_X{ctx.OpCompositeConstruct(
+            ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
+        const Id derivates_Y{ctx.OpCompositeConstruct(
+            ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
+        Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y);
+        if (Sirit::ValidId(offset)) {
+            Add(spv::ImageOperandsMask::Offset, offset);
+        }
+        if (has_lod_clamp) {
+            Add(spv::ImageOperandsMask::MinLod, lod_clamp);
+        }
+    }
+
+    std::span<const Id> Span() const noexcept {
+        return std::span{operands.data(), operands.size()};
+    }
+
+    std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept {
+        return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt;
+    }
+
+    spv::ImageOperandsMask Mask() const noexcept {
+        return mask;
+    }
+
+private:
+    void AddOffset(EmitContext& ctx, const IR::Value& offset) {
+        if (offset.IsEmpty()) {
+            return;
+        }
+        if (offset.IsImmediate()) {
+            Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32())));
+            return;
+        }
+        IR::Inst* const inst{offset.InstRecursive()};
+        if (inst->AreAllArgsImmediates()) {
+            switch (inst->GetOpcode()) {
+            case IR::Opcode::CompositeConstructU32x2:
+                Add(spv::ImageOperandsMask::ConstOffset,
+                    ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+                               static_cast<s32>(inst->Arg(1).U32())));
+                return;
+            case IR::Opcode::CompositeConstructU32x3:
+                Add(spv::ImageOperandsMask::ConstOffset,
+                    ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+                               static_cast<s32>(inst->Arg(1).U32()),
+                               static_cast<s32>(inst->Arg(2).U32())));
+                return;
+            case IR::Opcode::CompositeConstructU32x4:
+                Add(spv::ImageOperandsMask::ConstOffset,
+                    ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+                               static_cast<s32>(inst->Arg(1).U32()),
+                               static_cast<s32>(inst->Arg(2).U32()),
+                               static_cast<s32>(inst->Arg(3).U32())));
+                return;
+            default:
+                break;
+            }
+        }
+        Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
+    }
+
+    void Add(spv::ImageOperandsMask new_mask, Id value) {
+        mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
+                                                   static_cast<unsigned>(new_mask));
+        operands.push_back(value);
+    }
+
+    void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) {
+        mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
+                                                   static_cast<unsigned>(new_mask));
+        operands.push_back(value_1);
+        operands.push_back(value_2);
+    }
+
+    boost::container::static_vector<Id, 4> operands;
+    spv::ImageOperandsMask mask{};
+};
+
+Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) {
+    const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
+    if (def.count > 1) {
+        const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))};
+        return ctx.OpLoad(def.sampled_type, pointer);
+    } else {
+        return ctx.OpLoad(def.sampled_type, def.id);
+    }
+}
+
+Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
+    if (!index.IsImmediate() || index.U32() != 0) {
+        throw NotImplementedException("Indirect image indexing");
+    }
+    if (info.type == TextureType::Buffer) {
+        const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)};
+        if (def.count > 1) {
+            throw NotImplementedException("Indirect texture sample");
+        }
+        const Id sampler_id{def.id};
+        const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)};
+        return ctx.OpImage(ctx.image_buffer_type, id);
+    } else {
+        const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
+        if (def.count > 1) {
+            throw NotImplementedException("Indirect texture sample");
+        }
+        return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id));
+    }
+}
+
+Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+    if (!index.IsImmediate() || index.U32() != 0) {
+        throw NotImplementedException("Indirect image indexing");
+    }
+    if (info.type == TextureType::Buffer) {
+        const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)};
+        return ctx.OpLoad(def.image_type, def.id);
+    } else {
+        const ImageDefinition def{ctx.images.at(info.descriptor_index)};
+        return ctx.OpLoad(def.image_type, def.id);
+    }
+}
+
+Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    if (info.relaxed_precision != 0) {
+        ctx.Decorate(sample, spv::Decoration::RelaxedPrecision);
+    }
+    return sample;
+}
+
+template <typename MethodPtrType, typename... Args>
+Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
+        Id result_type, Args&&... args) {
+    IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+    if (!sparse) {
+        return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...));
+    }
+    const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
+    const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
+    const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
+    sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
+    sparse->Invalidate();
+    Decorate(ctx, inst, sample);
+    return ctx.OpCompositeExtract(result_type, sample, 1U);
+}
+} // Anonymous namespace
+
+Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGather(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGatherDref(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageFetch(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageQueryDimensions(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageQueryLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGradient(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageRead(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageWrite(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGather(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGatherDref(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageFetch(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageQueryDimensions(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageQueryLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGradient(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageRead(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageWrite(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                              Id bias_lc, const IR::Value& offset) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    if (ctx.stage == Stage::Fragment) {
+        const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
+                                     bias_lc, offset);
+        return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
+                    &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
+                    Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
+    } else {
+        // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
+        // if the lod was explicitly zero.  This may change on Turing with implicit compute
+        // derivatives
+        const Id lod{ctx.Const(0.0f)};
+        const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
+        return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+                    &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+                    Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+    }
+}
+
+Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                              Id lod, const IR::Value& offset) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, false, true, false, lod, offset);
+    return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+                &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+                Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+}
+
+Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+                                  Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
+                                 offset);
+    return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
+                &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
+                Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+                                  Id coords, Id dref, Id lod, const IR::Value& offset) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, false, true, false, lod, offset);
+    return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
+                &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
+                Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span());
+}
+
+Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                   const IR::Value& offset, const IR::Value& offset2) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, offset, offset2);
+    return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
+                ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
+                operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                       const IR::Value& offset, const IR::Value& offset2, Id dref) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, offset, offset2);
+    return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
+                ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(),
+                operands.Span());
+}
+
+Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
+                  Id lod, Id ms) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    if (info.type == TextureType::Buffer) {
+        lod = Id{};
+    }
+    const ImageOperands operands(offset, lod, ms);
+    return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
+                TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const Id image{TextureImage(ctx, info, index)};
+    const Id zero{ctx.u32_zero_value};
+    const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }};
+    switch (info.type) {
+    case TextureType::Color1D:
+        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod),
+                                        zero, zero, mips());
+    case TextureType::ColorArray1D:
+    case TextureType::Color2D:
+    case TextureType::ColorCube:
+        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod),
+                                        zero, mips());
+    case TextureType::ColorArray2D:
+    case TextureType::Color3D:
+    case TextureType::ColorArrayCube:
+        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod),
+                                        mips());
+    case TextureType::Buffer:
+        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero,
+                                        zero, mips());
+    }
+    throw LogicError("Unspecified image type {}", info.type.Value());
+}
+
+Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const Id zero{ctx.f32_zero_value};
+    const Id sampler{Texture(ctx, info, index)};
+    return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
+                                    zero, zero);
+}
+
+Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                     Id derivates, Id offset, Id lod_clamp) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates,
+                                 offset, lod_clamp);
+    return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+                &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+                Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+}
+
+Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
+        LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
+        return ctx.ConstantNull(ctx.U32[4]);
+    }
+    return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4],
+                Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{});
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    ctx.OpImageWrite(Image(ctx, index, info), coords, color);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
new file mode 100644
index 000000000..d7f1a365a
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
@@ -0,0 +1,183 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+    if (!index.IsImmediate()) {
+        throw NotImplementedException("Indirect image indexing");
+    }
+    if (info.type == TextureType::Buffer) {
+        const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())};
+        return def.id;
+    } else {
+        const ImageDefinition def{ctx.images.at(index.U32())};
+        return def.id;
+    }
+}
+
+std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
+    const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
+    const Id semantics{ctx.u32_zero_value};
+    return {scope, semantics};
+}
+
+Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value,
+                  Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const Id image{Image(ctx, index, info)};
+    const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
+    const auto [scope, semantics]{AtomicArgs(ctx)};
+    return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+} // Anonymous namespace
+
+Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
+    // TODO: This is not yet implemented
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
+    // TODO: This is not yet implemented
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                       Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                             Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitBindlessImageAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicInc32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicDec32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicOr32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicXor32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicInc32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicDec32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicOr32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicXor32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
new file mode 100644
index 000000000..f99c02848
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -0,0 +1,579 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::SPIRV {
+
+using Sirit::Id;
+
+class EmitContext;
+
+// Microinstruction emitters
+Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
+void EmitVoid(EmitContext& ctx);
+Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
+Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
+void EmitReference(EmitContext&);
+void EmitPhiMove(EmitContext&);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
+Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
+void EmitSetSampleMask(EmitContext& ctx, Id value);
+void EmitSetFragDepth(EmitContext& ctx, Id value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+Id EmitWorkgroupId(EmitContext& ctx);
+Id EmitLocalInvocationId(EmitContext& ctx);
+Id EmitInvocationId(EmitContext& ctx);
+Id EmitSampleId(EmitContext& ctx);
+Id EmitIsHelperInvocation(EmitContext& ctx);
+Id EmitYDirection(EmitContext& ctx);
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
+Id EmitUndefU1(EmitContext& ctx);
+Id EmitUndefU8(EmitContext& ctx);
+Id EmitUndefU16(EmitContext& ctx);
+Id EmitUndefU32(EmitContext& ctx);
+Id EmitUndefU64(EmitContext& ctx);
+void EmitLoadGlobalU8(EmitContext& ctx);
+void EmitLoadGlobalS8(EmitContext& ctx);
+void EmitLoadGlobalU16(EmitContext& ctx);
+void EmitLoadGlobalS16(EmitContext& ctx);
+Id EmitLoadGlobal32(EmitContext& ctx, Id address);
+Id EmitLoadGlobal64(EmitContext& ctx, Id address);
+Id EmitLoadGlobal128(EmitContext& ctx, Id address);
+void EmitWriteGlobalU8(EmitContext& ctx);
+void EmitWriteGlobalS8(EmitContext& ctx);
+void EmitWriteGlobalU16(EmitContext& ctx);
+void EmitWriteGlobalS16(EmitContext& ctx);
+void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value);
+void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value);
+void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value);
+Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value);
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
+Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+void EmitBitCastU16F16(EmitContext& ctx);
+Id EmitBitCastU32F32(EmitContext& ctx, Id value);
+void EmitBitCastU64F64(EmitContext& ctx);
+void EmitBitCastF16U16(EmitContext& ctx);
+Id EmitBitCastF32U32(EmitContext& ctx, Id value);
+void EmitBitCastF64U64(EmitContext& ctx);
+Id EmitPackUint2x32(EmitContext& ctx, Id value);
+Id EmitUnpackUint2x32(EmitContext& ctx, Id value);
+Id EmitPackFloat2x16(EmitContext& ctx, Id value);
+Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
+Id EmitPackHalf2x16(EmitContext& ctx, Id value);
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
+Id EmitPackDouble2x32(EmitContext& ctx, Id value);
+Id EmitUnpackDouble2x32(EmitContext& ctx, Id value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+Id EmitFPAbs16(EmitContext& ctx, Id value);
+Id EmitFPAbs32(EmitContext& ctx, Id value);
+Id EmitFPAbs64(EmitContext& ctx, Id value);
+Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPMax32(EmitContext& ctx, Id a, Id b);
+Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
+Id EmitFPMin32(EmitContext& ctx, Id a, Id b);
+Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
+Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPNeg16(EmitContext& ctx, Id value);
+Id EmitFPNeg32(EmitContext& ctx, Id value);
+Id EmitFPNeg64(EmitContext& ctx, Id value);
+Id EmitFPSin(EmitContext& ctx, Id value);
+Id EmitFPCos(EmitContext& ctx, Id value);
+Id EmitFPExp2(EmitContext& ctx, Id value);
+Id EmitFPLog2(EmitContext& ctx, Id value);
+Id EmitFPRecip32(EmitContext& ctx, Id value);
+Id EmitFPRecip64(EmitContext& ctx, Id value);
+Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
+Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
+Id EmitFPSqrt(EmitContext& ctx, Id value);
+Id EmitFPSaturate16(EmitContext& ctx, Id value);
+Id EmitFPSaturate32(EmitContext& ctx, Id value);
+Id EmitFPSaturate64(EmitContext& ctx, Id value);
+Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPRoundEven16(EmitContext& ctx, Id value);
+Id EmitFPRoundEven32(EmitContext& ctx, Id value);
+Id EmitFPRoundEven64(EmitContext& ctx, Id value);
+Id EmitFPFloor16(EmitContext& ctx, Id value);
+Id EmitFPFloor32(EmitContext& ctx, Id value);
+Id EmitFPFloor64(EmitContext& ctx, Id value);
+Id EmitFPCeil16(EmitContext& ctx, Id value);
+Id EmitFPCeil32(EmitContext& ctx, Id value);
+Id EmitFPCeil64(EmitContext& ctx, Id value);
+Id EmitFPTrunc16(EmitContext& ctx, Id value);
+Id EmitFPTrunc32(EmitContext& ctx, Id value);
+Id EmitFPTrunc64(EmitContext& ctx, Id value);
+Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPIsNan16(EmitContext& ctx, Id value);
+Id EmitFPIsNan32(EmitContext& ctx, Id value);
+Id EmitFPIsNan64(EmitContext& ctx, Id value);
+Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
+Id EmitISub32(EmitContext& ctx, Id a, Id b);
+Id EmitISub64(EmitContext& ctx, Id a, Id b);
+Id EmitIMul32(EmitContext& ctx, Id a, Id b);
+Id EmitINeg32(EmitContext& ctx, Id value);
+Id EmitINeg64(EmitContext& ctx, Id value);
+Id EmitIAbs32(EmitContext& ctx, Id value);
+Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
+Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count);
+Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
+Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
+Id EmitBitReverse32(EmitContext& ctx, Id value);
+Id EmitBitCount32(EmitContext& ctx, Id value);
+Id EmitBitwiseNot32(EmitContext& ctx, Id value);
+Id EmitFindSMsb32(EmitContext& ctx, Id value);
+Id EmitFindUMsb32(EmitContext& ctx, Id value);
+Id EmitSMin32(EmitContext& ctx, Id a, Id b);
+Id EmitUMin32(EmitContext& ctx, Id a, Id b);
+Id EmitSMax32(EmitContext& ctx, Id a, Id b);
+Id EmitUMax32(EmitContext& ctx, Id a, Id b);
+Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
+Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
+Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value);
+Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                               Id value);
+Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value);
+Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                               Id value);
+Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitGlobalAtomicIAdd32(EmitContext& ctx);
+Id EmitGlobalAtomicSMin32(EmitContext& ctx);
+Id EmitGlobalAtomicUMin32(EmitContext& ctx);
+Id EmitGlobalAtomicSMax32(EmitContext& ctx);
+Id EmitGlobalAtomicUMax32(EmitContext& ctx);
+Id EmitGlobalAtomicInc32(EmitContext& ctx);
+Id EmitGlobalAtomicDec32(EmitContext& ctx);
+Id EmitGlobalAtomicAnd32(EmitContext& ctx);
+Id EmitGlobalAtomicOr32(EmitContext& ctx);
+Id EmitGlobalAtomicXor32(EmitContext& ctx);
+Id EmitGlobalAtomicExchange32(EmitContext& ctx);
+Id EmitGlobalAtomicIAdd64(EmitContext& ctx);
+Id EmitGlobalAtomicSMin64(EmitContext& ctx);
+Id EmitGlobalAtomicUMin64(EmitContext& ctx);
+Id EmitGlobalAtomicSMax64(EmitContext& ctx);
+Id EmitGlobalAtomicUMax64(EmitContext& ctx);
+Id EmitGlobalAtomicInc64(EmitContext& ctx);
+Id EmitGlobalAtomicDec64(EmitContext& ctx);
+Id EmitGlobalAtomicAnd64(EmitContext& ctx);
+Id EmitGlobalAtomicOr64(EmitContext& ctx);
+Id EmitGlobalAtomicXor64(EmitContext& ctx);
+Id EmitGlobalAtomicExchange64(EmitContext& ctx);
+Id EmitGlobalAtomicAddF32(EmitContext& ctx);
+Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+Id EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalNot(EmitContext& ctx, Id value);
+Id EmitConvertS16F16(EmitContext& ctx, Id value);
+Id EmitConvertS16F32(EmitContext& ctx, Id value);
+Id EmitConvertS16F64(EmitContext& ctx, Id value);
+Id EmitConvertS32F16(EmitContext& ctx, Id value);
+Id EmitConvertS32F32(EmitContext& ctx, Id value);
+Id EmitConvertS32F64(EmitContext& ctx, Id value);
+Id EmitConvertS64F16(EmitContext& ctx, Id value);
+Id EmitConvertS64F32(EmitContext& ctx, Id value);
+Id EmitConvertS64F64(EmitContext& ctx, Id value);
+Id EmitConvertU16F16(EmitContext& ctx, Id value);
+Id EmitConvertU16F32(EmitContext& ctx, Id value);
+Id EmitConvertU16F64(EmitContext& ctx, Id value);
+Id EmitConvertU32F16(EmitContext& ctx, Id value);
+Id EmitConvertU32F32(EmitContext& ctx, Id value);
+Id EmitConvertU32F64(EmitContext& ctx, Id value);
+Id EmitConvertU64F16(EmitContext& ctx, Id value);
+Id EmitConvertU64F32(EmitContext& ctx, Id value);
+Id EmitConvertU64F64(EmitContext& ctx, Id value);
+Id EmitConvertU64U32(EmitContext& ctx, Id value);
+Id EmitConvertU32U64(EmitContext& ctx, Id value);
+Id EmitConvertF16F32(EmitContext& ctx, Id value);
+Id EmitConvertF32F16(EmitContext& ctx, Id value);
+Id EmitConvertF32F64(EmitContext& ctx, Id value);
+Id EmitConvertF64F32(EmitContext& ctx, Id value);
+Id EmitConvertF16S8(EmitContext& ctx, Id value);
+Id EmitConvertF16S16(EmitContext& ctx, Id value);
+Id EmitConvertF16S32(EmitContext& ctx, Id value);
+Id EmitConvertF16S64(EmitContext& ctx, Id value);
+Id EmitConvertF16U8(EmitContext& ctx, Id value);
+Id EmitConvertF16U16(EmitContext& ctx, Id value);
+Id EmitConvertF16U32(EmitContext& ctx, Id value);
+Id EmitConvertF16U64(EmitContext& ctx, Id value);
+Id EmitConvertF32S8(EmitContext& ctx, Id value);
+Id EmitConvertF32S16(EmitContext& ctx, Id value);
+Id EmitConvertF32S32(EmitContext& ctx, Id value);
+Id EmitConvertF32S64(EmitContext& ctx, Id value);
+Id EmitConvertF32U8(EmitContext& ctx, Id value);
+Id EmitConvertF32U16(EmitContext& ctx, Id value);
+Id EmitConvertF32U32(EmitContext& ctx, Id value);
+Id EmitConvertF32U64(EmitContext& ctx, Id value);
+Id EmitConvertF64S8(EmitContext& ctx, Id value);
+Id EmitConvertF64S16(EmitContext& ctx, Id value);
+Id EmitConvertF64S32(EmitContext& ctx, Id value);
+Id EmitConvertF64S64(EmitContext& ctx, Id value);
+Id EmitConvertF64U8(EmitContext& ctx, Id value);
+Id EmitConvertF64U16(EmitContext& ctx, Id value);
+Id EmitConvertF64U32(EmitContext& ctx, Id value);
+Id EmitConvertF64U64(EmitContext& ctx, Id value);
+Id EmitBindlessImageSampleImplicitLod(EmitContext&);
+Id EmitBindlessImageSampleExplicitLod(EmitContext&);
+Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+Id EmitBindlessImageGather(EmitContext&);
+Id EmitBindlessImageGatherDref(EmitContext&);
+Id EmitBindlessImageFetch(EmitContext&);
+Id EmitBindlessImageQueryDimensions(EmitContext&);
+Id EmitBindlessImageQueryLod(EmitContext&);
+Id EmitBindlessImageGradient(EmitContext&);
+Id EmitBindlessImageRead(EmitContext&);
+Id EmitBindlessImageWrite(EmitContext&);
+Id EmitBoundImageSampleImplicitLod(EmitContext&);
+Id EmitBoundImageSampleExplicitLod(EmitContext&);
+Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+Id EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+Id EmitBoundImageGather(EmitContext&);
+Id EmitBoundImageGatherDref(EmitContext&);
+Id EmitBoundImageFetch(EmitContext&);
+Id EmitBoundImageQueryDimensions(EmitContext&);
+Id EmitBoundImageQueryLod(EmitContext&);
+Id EmitBoundImageGradient(EmitContext&);
+Id EmitBoundImageRead(EmitContext&);
+Id EmitBoundImageWrite(EmitContext&);
+Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                              Id bias_lc, const IR::Value& offset);
+Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                              Id lod, const IR::Value& offset);
+Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+                                  Id coords, Id dref, Id bias_lc, const IR::Value& offset);
+Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+                                  Id coords, Id dref, Id lod, const IR::Value& offset);
+Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                   const IR::Value& offset, const IR::Value& offset2);
+Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                       const IR::Value& offset, const IR::Value& offset2, Id dref);
+Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
+                  Id lod, Id ms);
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod);
+Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
+Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                     Id derivates, Id offset, Id lod_clamp);
+Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
+void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
+Id EmitBindlessImageAtomicIAdd32(EmitContext&);
+Id EmitBindlessImageAtomicSMin32(EmitContext&);
+Id EmitBindlessImageAtomicUMin32(EmitContext&);
+Id EmitBindlessImageAtomicSMax32(EmitContext&);
+Id EmitBindlessImageAtomicUMax32(EmitContext&);
+Id EmitBindlessImageAtomicInc32(EmitContext&);
+Id EmitBindlessImageAtomicDec32(EmitContext&);
+Id EmitBindlessImageAtomicAnd32(EmitContext&);
+Id EmitBindlessImageAtomicOr32(EmitContext&);
+Id EmitBindlessImageAtomicXor32(EmitContext&);
+Id EmitBindlessImageAtomicExchange32(EmitContext&);
+Id EmitBoundImageAtomicIAdd32(EmitContext&);
+Id EmitBoundImageAtomicSMin32(EmitContext&);
+Id EmitBoundImageAtomicUMin32(EmitContext&);
+Id EmitBoundImageAtomicSMax32(EmitContext&);
+Id EmitBoundImageAtomicUMax32(EmitContext&);
+Id EmitBoundImageAtomicInc32(EmitContext&);
+Id EmitBoundImageAtomicDec32(EmitContext&);
+Id EmitBoundImageAtomicAnd32(EmitContext&);
+Id EmitBoundImageAtomicOr32(EmitContext&);
+Id EmitBoundImageAtomicXor32(EmitContext&);
+Id EmitBoundImageAtomicExchange32(EmitContext&);
+Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value);
+Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value);
+Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value);
+Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                       Id value);
+Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value);
+Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                             Id value);
+Id EmitLaneId(EmitContext& ctx);
+Id EmitVoteAll(EmitContext& ctx, Id pred);
+Id EmitVoteAny(EmitContext& ctx, Id pred);
+Id EmitVoteEqual(EmitContext& ctx, Id pred);
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
+Id EmitSubgroupEqMask(EmitContext& ctx);
+Id EmitSubgroupLtMask(EmitContext& ctx);
+Id EmitSubgroupLeMask(EmitContext& ctx);
+Id EmitSubgroupGtMask(EmitContext& ctx);
+Id EmitSubgroupGeMask(EmitContext& ctx);
+Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                    Id segmentation_mask);
+Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                 Id segmentation_mask);
+Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                   Id segmentation_mask);
+Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                        Id segmentation_mask);
+Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle);
+Id EmitDPdxFine(EmitContext& ctx, Id op_a);
+Id EmitDPdyFine(EmitContext& ctx, Id op_a);
+Id EmitDPdxCoarse(EmitContext& ctx, Id op_a);
+Id EmitDPdyCoarse(EmitContext& ctx, Id op_a);
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
new file mode 100644
index 000000000..3501d7495
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -0,0 +1,270 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
+    IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
+    if (!zero) {
+        return;
+    }
+    zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value));
+    zero->Invalidate();
+}
+
+void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
+    IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
+    if (!sign) {
+        return;
+    }
+    sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value));
+    sign->Invalidate();
+}
+} // Anonymous namespace
+
+Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    Id result{};
+    if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
+        const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])};
+        const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)};
+        result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U);
+
+        const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)};
+        carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value));
+        carry->Invalidate();
+    } else {
+        result = ctx.OpIAdd(ctx.U32[1], a, b);
+    }
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
+        // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
+        constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
+        const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)};
+        const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)};
+
+        const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)};
+        const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)};
+        const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)};
+        overflow->SetDefinition(carry_flag);
+        overflow->Invalidate();
+    }
+    return result;
+}
+
+Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpIAdd(ctx.U64, a, b);
+}
+
+Id EmitISub32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpISub(ctx.U32[1], a, b);
+}
+
+Id EmitISub64(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpISub(ctx.U64, a, b);
+}
+
+Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpIMul(ctx.U32[1], a, b);
+}
+
+Id EmitINeg32(EmitContext& ctx, Id value) {
+    return ctx.OpSNegate(ctx.U32[1], value);
+}
+
+Id EmitINeg64(EmitContext& ctx, Id value) {
+    return ctx.OpSNegate(ctx.U64, value);
+}
+
+Id EmitIAbs32(EmitContext& ctx, Id value) {
+    return ctx.OpSAbs(ctx.U32[1], value);
+}
+
+Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
+    return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) {
+    return ctx.OpShiftLeftLogical(ctx.U64, base, shift);
+}
+
+Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) {
+    return ctx.OpShiftRightLogical(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) {
+    return ctx.OpShiftRightLogical(ctx.U64, base, shift);
+}
+
+Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) {
+    return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) {
+    return ctx.OpShiftRightArithmetic(ctx.U64, base, shift);
+}
+
+Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)};
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    return result;
+}
+
+Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)};
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    return result;
+}
+
+Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)};
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    return result;
+}
+
+Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) {
+    return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count);
+}
+
+Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
+    const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)};
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    return result;
+}
+
+Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
+    const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)};
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    return result;
+}
+
+Id EmitBitReverse32(EmitContext& ctx, Id value) {
+    return ctx.OpBitReverse(ctx.U32[1], value);
+}
+
+Id EmitBitCount32(EmitContext& ctx, Id value) {
+    return ctx.OpBitCount(ctx.U32[1], value);
+}
+
+Id EmitBitwiseNot32(EmitContext& ctx, Id value) {
+    return ctx.OpNot(ctx.U32[1], value);
+}
+
+Id EmitFindSMsb32(EmitContext& ctx, Id value) {
+    return ctx.OpFindSMsb(ctx.U32[1], value);
+}
+
+Id EmitFindUMsb32(EmitContext& ctx, Id value) {
+    return ctx.OpFindUMsb(ctx.U32[1], value);
+}
+
+Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
+    const bool is_broken{ctx.profile.has_broken_signed_operations};
+    if (is_broken) {
+        a = ctx.OpBitcast(ctx.S32[1], a);
+        b = ctx.OpBitcast(ctx.S32[1], b);
+    }
+    const Id result{ctx.OpSMin(ctx.U32[1], a, b)};
+    return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
+}
+
+Id EmitUMin32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpUMin(ctx.U32[1], a, b);
+}
+
+Id EmitSMax32(EmitContext& ctx, Id a, Id b) {
+    const bool is_broken{ctx.profile.has_broken_signed_operations};
+    if (is_broken) {
+        a = ctx.OpBitcast(ctx.S32[1], a);
+        b = ctx.OpBitcast(ctx.S32[1], b);
+    }
+    const Id result{ctx.OpSMax(ctx.U32[1], a, b)};
+    return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
+}
+
+Id EmitUMax32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpUMax(ctx.U32[1], a, b);
+}
+
+Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
+    Id result{};
+    if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) {
+        value = ctx.OpBitcast(ctx.S32[1], value);
+        min = ctx.OpBitcast(ctx.S32[1], min);
+        max = ctx.OpBitcast(ctx.S32[1], max);
+        if (ctx.profile.has_broken_spirv_clamp) {
+            result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min);
+        } else {
+            result = ctx.OpSClamp(ctx.S32[1], value, min, max);
+        }
+        result = ctx.OpBitcast(ctx.U32[1], result);
+    } else {
+        result = ctx.OpSClamp(ctx.U32[1], value, min, max);
+    }
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    return result;
+}
+
+Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
+    Id result{};
+    if (ctx.profile.has_broken_spirv_clamp) {
+        result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min);
+    } else {
+        result = ctx.OpUClamp(ctx.U32[1], value, min, max);
+    }
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    return result;
+}
+
+Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpULessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpIEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpULessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpUGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpINotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
new file mode 100644
index 000000000..b9a9500fc
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpLogicalOr(ctx.U1, a, b);
+}
+
+Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpLogicalAnd(ctx.U1, a, b);
+}
+
+Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpLogicalNotEqual(ctx.U1, a, b);
+}
+
+Id EmitLogicalNot(EmitContext& ctx, Id value) {
+    return ctx.OpLogicalNot(ctx.U1, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
new file mode 100644
index 000000000..679ee2684
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -0,0 +1,275 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <bit>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size,
+                u32 index_offset = 0) {
+    if (offset.IsImmediate()) {
+        const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset};
+        return ctx.Const(imm_offset);
+    }
+    const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
+    Id index{ctx.Def(offset)};
+    if (shift != 0) {
+        const Id shift_id{ctx.Const(shift)};
+        index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
+    }
+    if (index_offset != 0) {
+        index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
+    }
+    return index;
+}
+
+Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                  const StorageTypeDefinition& type_def, size_t element_size,
+                  Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+    if (!binding.IsImmediate()) {
+        throw NotImplementedException("Dynamic storage buffer indexing");
+    }
+    const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
+    const Id index{StorageIndex(ctx, offset, element_size, index_offset)};
+    return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
+}
+
+Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type,
+               const StorageTypeDefinition& type_def, size_t element_size,
+               Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+    const Id pointer{
+        StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
+    return ctx.OpLoad(result_type, pointer);
+}
+
+Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                 u32 index_offset = 0) {
+    return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32),
+                       &StorageDefinitions::U32, index_offset);
+}
+
+void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+                  const StorageTypeDefinition& type_def, size_t element_size,
+                  Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+    const Id pointer{
+        StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
+    ctx.OpStore(pointer, value);
+}
+
+void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+                    u32 index_offset = 0) {
+    WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
+                 &StorageDefinitions::U32, index_offset);
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalS8(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalU16(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalS16(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
+    if (ctx.profile.support_int64) {
+        return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address);
+    }
+    LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+    return ctx.Const(0u);
+}
+
+Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
+    if (ctx.profile.support_int64) {
+        return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address);
+    }
+    LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+    return ctx.Const(0u, 0u);
+}
+
+Id EmitLoadGlobal128(EmitContext& ctx, Id address) {
+    if (ctx.profile.support_int64) {
+        return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address);
+    }
+    LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+    return ctx.Const(0u, 0u, 0u, 0u);
+}
+
+void EmitWriteGlobalU8(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalS8(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalU16(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalS16(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
+    if (ctx.profile.support_int64) {
+        ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value);
+        return;
+    }
+    LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
+    if (ctx.profile.support_int64) {
+        ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value);
+        return;
+    }
+    LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) {
+    if (ctx.profile.support_int64) {
+        ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value);
+        return;
+    }
+    LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
+        return ctx.OpUConvert(ctx.U32[1],
+                              LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8,
+                                          sizeof(u8), &StorageDefinitions::U8));
+    } else {
+        return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+                                      ctx.BitOffset8(offset), ctx.Const(8u));
+    }
+}
+
+Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
+        return ctx.OpSConvert(ctx.U32[1],
+                              LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8,
+                                          sizeof(s8), &StorageDefinitions::S8));
+    } else {
+        return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+                                      ctx.BitOffset8(offset), ctx.Const(8u));
+    }
+}
+
+Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
+        return ctx.OpUConvert(ctx.U32[1],
+                              LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16,
+                                          sizeof(u16), &StorageDefinitions::U16));
+    } else {
+        return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+                                      ctx.BitOffset16(offset), ctx.Const(16u));
+    }
+}
+
+Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
+        return ctx.OpSConvert(ctx.U32[1],
+                              LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16,
+                                          sizeof(s16), &StorageDefinitions::S16));
+    } else {
+        return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+                                      ctx.BitOffset16(offset), ctx.Const(16u));
+    }
+}
+
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    return LoadStorage32(ctx, binding, offset);
+}
+
+Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing) {
+        return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2,
+                           sizeof(u32[2]), &StorageDefinitions::U32x2);
+    } else {
+        return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0),
+                                        LoadStorage32(ctx, binding, offset, 1));
+    }
+}
+
+Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    if (ctx.profile.support_descriptor_aliasing) {
+        return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4,
+                           sizeof(u32[4]), &StorageDefinitions::U32x4);
+    } else {
+        return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0),
+                                        LoadStorage32(ctx, binding, offset, 1),
+                                        LoadStorage32(ctx, binding, offset, 2),
+                                        LoadStorage32(ctx, binding, offset, 3));
+    }
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value) {
+    WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
+                 sizeof(u8), &StorageDefinitions::U8);
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value) {
+    WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
+                 sizeof(s8), &StorageDefinitions::S8);
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value) {
+    WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
+                 sizeof(u16), &StorageDefinitions::U16);
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value) {
+    WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
+                 sizeof(s16), &StorageDefinitions::S16);
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value) {
+    WriteStorage32(ctx, binding, offset, value);
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value) {
+    if (ctx.profile.support_descriptor_aliasing) {
+        WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]),
+                     &StorageDefinitions::U32x2);
+    } else {
+        for (u32 index = 0; index < 2; ++index) {
+            const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
+            WriteStorage32(ctx, binding, offset, element, index);
+        }
+    }
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value) {
+    if (ctx.profile.support_descriptor_aliasing) {
+        WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]),
+                     &StorageDefinitions::U32x4);
+    } else {
+        for (u32 index = 0; index < 4; ++index) {
+            const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
+            WriteStorage32(ctx, binding, offset, element, index);
+        }
+    }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
new file mode 100644
index 000000000..c5b4f4720
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -0,0 +1,42 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.U1, cond, true_value, false_value);
+}
+
+Id EmitSelectU8(EmitContext&, Id, Id, Id) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.U16, cond, true_value, false_value);
+}
+
+Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);
+}
+
+Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.U64, cond, true_value, false_value);
+}
+
+Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value);
+}
+
+Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value);
+}
+
+Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
new file mode 100644
index 000000000..9a79fc7a2
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -0,0 +1,174 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
+    const Id shift_id{ctx.Const(shift)};
+    const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
+}
+
+Id Word(EmitContext& ctx, Id offset) {
+    const Id shift_id{ctx.Const(2U)};
+    const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+    return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
+    const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))};
+    const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))};
+    const Id count_id{ctx.Const(count)};
+    return {bit, count_id};
+}
+} // Anonymous namespace
+
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{
+            ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+        return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+    } else {
+        const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+        return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+    }
+}
+
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{
+            ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+        return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+    } else {
+        const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+        return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+    }
+}
+
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+        return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+    } else {
+        const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+        return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+    }
+}
+
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+        return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+    } else {
+        const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+        return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+    }
+}
+
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
+        return ctx.OpLoad(ctx.U32[1], pointer);
+    } else {
+        return Word(ctx, offset);
+    }
+}
+
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+        return ctx.OpLoad(ctx.U32[2], pointer);
+    } else {
+        const Id shift_id{ctx.Const(2U)};
+        const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+        const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))};
+        const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
+        const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
+        return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
+                                        ctx.OpLoad(ctx.U32[1], rhs_pointer));
+    }
+}
+
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+        return ctx.OpLoad(ctx.U32[4], pointer);
+    }
+    const Id shift_id{ctx.Const(2U)};
+    const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    std::array<Id, 4> values{};
+    for (u32 i = 0; i < 4; ++i) {
+        const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
+        const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+        values[i] = ctx.OpLoad(ctx.U32[1], pointer);
+    }
+    return ctx.OpCompositeConstruct(ctx.U32[4], values);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{
+            ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+        ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
+    } else {
+        ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
+    }
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+        ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
+    } else {
+        ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
+    }
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
+    Id pointer{};
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
+    } else {
+        const Id shift{ctx.Const(2U)};
+        const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+        pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
+    }
+    ctx.OpStore(pointer, value);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+        ctx.OpStore(pointer, value);
+        return;
+    }
+    const Id shift{ctx.Const(2U)};
+    const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+    const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))};
+    const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
+    const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
+    ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
+    ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+        ctx.OpStore(pointer, value);
+        return;
+    }
+    const Id shift{ctx.Const(2U)};
+    const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+    for (u32 i = 0; i < 4; ++i) {
+        const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
+        const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+        ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
+    }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
new file mode 100644
index 000000000..9e7eb3cb1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -0,0 +1,150 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void ConvertDepthMode(EmitContext& ctx) {
+    const Id type{ctx.F32[1]};
+    const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)};
+    const Id z{ctx.OpCompositeExtract(type, position, 2u)};
+    const Id w{ctx.OpCompositeExtract(type, position, 3u)};
+    const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))};
+    const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)};
+    ctx.OpStore(ctx.output_position, vector);
+}
+
+void SetFixedPipelinePointSize(EmitContext& ctx) {
+    if (ctx.runtime_info.fixed_state_point_size) {
+        const float point_size{*ctx.runtime_info.fixed_state_point_size};
+        ctx.OpStore(ctx.output_point_size, ctx.Const(point_size));
+    }
+}
+
+Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one,
+                  Id default_vector) {
+    switch (num_components) {
+    case 1:
+        return element == 3 ? one : zero;
+    case 2:
+        return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero);
+    case 3:
+        return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero);
+    case 4:
+        return default_vector;
+    }
+    throw InvalidArgument("Bad element");
+}
+
+Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) {
+    switch (comparison) {
+    case CompareFunction::Never:
+        return ctx.false_value;
+    case CompareFunction::Less:
+        return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2);
+    case CompareFunction::Equal:
+        return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2);
+    case CompareFunction::LessThanEqual:
+        return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2);
+    case CompareFunction::Greater:
+        return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2);
+    case CompareFunction::NotEqual:
+        return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2);
+    case CompareFunction::GreaterThanEqual:
+        return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2);
+    case CompareFunction::Always:
+        return ctx.true_value;
+    }
+    throw InvalidArgument("Comparison function {}", comparison);
+}
+
+void AlphaTest(EmitContext& ctx) {
+    if (!ctx.runtime_info.alpha_test_func) {
+        return;
+    }
+    const auto comparison{*ctx.runtime_info.alpha_test_func};
+    if (comparison == CompareFunction::Always) {
+        return;
+    }
+    if (!Sirit::ValidId(ctx.frag_color[0])) {
+        return;
+    }
+
+    const Id type{ctx.F32[1]};
+    const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])};
+    const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)};
+
+    const Id true_label{ctx.OpLabel()};
+    const Id discard_label{ctx.OpLabel()};
+    const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)};
+    const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)};
+
+    ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone);
+    ctx.OpBranchConditional(condition, true_label, discard_label);
+    ctx.AddLabel(discard_label);
+    ctx.OpKill();
+    ctx.AddLabel(true_label);
+}
+} // Anonymous namespace
+
+void EmitPrologue(EmitContext& ctx) {
+    if (ctx.stage == Stage::VertexB) {
+        const Id zero{ctx.Const(0.0f)};
+        const Id one{ctx.Const(1.0f)};
+        const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)};
+        ctx.OpStore(ctx.output_position, default_vector);
+        for (const auto& info : ctx.output_generics) {
+            if (info[0].num_components == 0) {
+                continue;
+            }
+            u32 element{0};
+            while (element < 4) {
+                const auto& element_info{info[element]};
+                const u32 num{element_info.num_components};
+                const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)};
+                ctx.OpStore(element_info.id, value);
+                element += num;
+            }
+        }
+    }
+    if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
+        SetFixedPipelinePointSize(ctx);
+    }
+}
+
+void EmitEpilogue(EmitContext& ctx) {
+    if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) {
+        ConvertDepthMode(ctx);
+    }
+    if (ctx.stage == Stage::Fragment) {
+        AlphaTest(ctx);
+    }
+}
+
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
+    if (ctx.runtime_info.convert_depth_mode) {
+        ConvertDepthMode(ctx);
+    }
+    if (stream.IsImmediate()) {
+        ctx.OpEmitStreamVertex(ctx.Def(stream));
+    } else {
+        LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
+        ctx.OpEmitStreamVertex(ctx.u32_zero_value);
+    }
+    // Restore fixed pipeline point size after emitting the vertex
+    SetFixedPipelinePointSize(ctx);
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+    if (stream.IsImmediate()) {
+        ctx.OpEndStreamPrimitive(ctx.Def(stream));
+    } else {
+        LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
+        ctx.OpEndStreamPrimitive(ctx.u32_zero_value);
+    }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
new file mode 100644
index 000000000..c9f469e90
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitUndefU1(EmitContext& ctx) {
+    return ctx.OpUndef(ctx.U1);
+}
+
+Id EmitUndefU8(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitUndefU16(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitUndefU32(EmitContext& ctx) {
+    return ctx.OpUndef(ctx.U32[1]);
+}
+
+Id EmitUndefU64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
new file mode 100644
index 000000000..78b1e1ba7
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -0,0 +1,203 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id WarpExtract(EmitContext& ctx, Id value) {
+    const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+}
+
+Id LoadMask(EmitContext& ctx, Id mask) {
+    const Id value{ctx.OpLoad(ctx.U32[4], mask)};
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpCompositeExtract(ctx.U32[1], value, 0U);
+    }
+    return WarpExtract(ctx, value);
+}
+
+void SetInBoundsFlag(IR::Inst* inst, Id result) {
+    IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+    if (!in_bounds) {
+        return;
+    }
+    in_bounds->SetDefinition(result);
+    in_bounds->Invalidate();
+}
+
+Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) {
+    return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask);
+}
+
+Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) {
+    return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id,
+                           ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask));
+}
+
+Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) {
+    const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
+    const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
+    return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask);
+}
+
+Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
+    return ctx.OpSelect(ctx.U32[1], in_range,
+                        ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
+}
+} // Anonymous namespace
+
+Id EmitLaneId(EmitContext& ctx) {
+    const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return id;
+    }
+    return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U));
+}
+
+Id EmitVoteAll(EmitContext& ctx, Id pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpSubgroupAllKHR(ctx.U1, pred);
+    }
+    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+    const Id active_mask{WarpExtract(ctx, mask_ballot)};
+    const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+    const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+    return ctx.OpIEqual(ctx.U1, lhs, active_mask);
+}
+
+Id EmitVoteAny(EmitContext& ctx, Id pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
+    }
+    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+    const Id active_mask{WarpExtract(ctx, mask_ballot)};
+    const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+    const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+    return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
+}
+
+Id EmitVoteEqual(EmitContext& ctx, Id pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
+    }
+    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+    const Id active_mask{WarpExtract(ctx, mask_ballot)};
+    const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+    const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
+    return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
+                           ctx.OpIEqual(ctx.U1, lhs, active_mask));
+}
+
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
+    const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
+    }
+    return WarpExtract(ctx, ballot);
+}
+
+Id EmitSubgroupEqMask(EmitContext& ctx) {
+    return LoadMask(ctx, ctx.subgroup_mask_eq);
+}
+
+Id EmitSubgroupLtMask(EmitContext& ctx) {
+    return LoadMask(ctx, ctx.subgroup_mask_lt);
+}
+
+Id EmitSubgroupLeMask(EmitContext& ctx) {
+    return LoadMask(ctx, ctx.subgroup_mask_le);
+}
+
+Id EmitSubgroupGtMask(EmitContext& ctx) {
+    return LoadMask(ctx, ctx.subgroup_mask_gt);
+}
+
+Id EmitSubgroupGeMask(EmitContext& ctx) {
+    return LoadMask(ctx, ctx.subgroup_mask_ge);
+}
+
+Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                    Id segmentation_mask) {
+    const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
+    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
+    const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
+
+    const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
+    const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
+    const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+    SetInBoundsFlag(inst, in_range);
+    return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                 Id segmentation_mask) {
+    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+    const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
+    const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+    SetInBoundsFlag(inst, in_range);
+    return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                   Id segmentation_mask) {
+    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+    const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
+    const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+    SetInBoundsFlag(inst, in_range);
+    return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                        Id segmentation_mask) {
+    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+    const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
+    const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+    SetInBoundsFlag(inst, in_range);
+    return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
+    const Id three{ctx.Const(3U)};
+    Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
+    mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U));
+    mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask);
+    mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
+
+    const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)};
+    const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)};
+
+    const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)};
+    const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)};
+    return ctx.OpFAdd(ctx.F32[1], result_a, result_b);
+}
+
+Id EmitDPdxFine(EmitContext& ctx, Id op_a) {
+    return ctx.OpDPdxFine(ctx.F32[1], op_a);
+}
+
+Id EmitDPdyFine(EmitContext& ctx, Id op_a) {
+    return ctx.OpDPdyFine(ctx.F32[1], op_a);
+}
+
+Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) {
+    return ctx.OpDPdxCoarse(ctx.F32[1], op_a);
+}
+
+Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) {
+    return ctx.OpDPdyCoarse(ctx.F32[1], op_a);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
new file mode 100644
index 000000000..8369d0d84
--- /dev/null
+++ b/src/shader_recompiler/environment.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <array>
+
+#include "common/common_types.h"
+#include "shader_recompiler/program_header.h"
+#include "shader_recompiler/shader_info.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+
+class Environment {
+public:
+    virtual ~Environment() = default;
+
+    [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
+
+    [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0;
+
+    [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
+
+    [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
+
+    [[nodiscard]] virtual u32 LocalMemorySize() const = 0;
+
+    [[nodiscard]] virtual u32 SharedMemorySize() const = 0;
+
+    [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
+
+    [[nodiscard]] const ProgramHeader& SPH() const noexcept {
+        return sph;
+    }
+
+    [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept {
+        return gp_passthrough_mask;
+    }
+
+    [[nodiscard]] Stage ShaderStage() const noexcept {
+        return stage;
+    }
+
+    [[nodiscard]] u32 StartAddress() const noexcept {
+        return start_address;
+    }
+
+protected:
+    ProgramHeader sph{};
+    std::array<u32, 8> gp_passthrough_mask{};
+    Stage stage{};
+    u32 start_address{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h
new file mode 100644
index 000000000..337e7f0c8
--- /dev/null
+++ b/src/shader_recompiler/exception.h
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include <fmt/format.h>
+
+namespace Shader {
+
+class Exception : public std::exception {
+public:
+    explicit Exception(std::string message) noexcept : err_message{std::move(message)} {}
+
+    const char* what() const noexcept override {
+        return err_message.c_str();
+    }
+
+    void Prepend(std::string_view prepend) {
+        err_message.insert(0, prepend);
+    }
+
+    void Append(std::string_view append) {
+        err_message += append;
+    }
+
+private:
+    std::string err_message;
+};
+
+class LogicError : public Exception {
+public:
+    template <typename... Args>
+    LogicError(const char* message, Args&&... args)
+        : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+class RuntimeError : public Exception {
+public:
+    template <typename... Args>
+    RuntimeError(const char* message, Args&&... args)
+        : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+class NotImplementedException : public Exception {
+public:
+    template <typename... Args>
+    NotImplementedException(const char* message, Args&&... args)
+        : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {
+        Append(" is not implemented");
+    }
+};
+
+class InvalidArgument : public Exception {
+public:
+    template <typename... Args>
+    InvalidArgument(const char* message, Args&&... args)
+        : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
new file mode 100644
index 000000000..b61773487
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class Block;
+
+struct AbstractSyntaxNode {
+    enum class Type {
+        Block,
+        If,
+        EndIf,
+        Loop,
+        Repeat,
+        Break,
+        Return,
+        Unreachable,
+    };
+    union Data {
+        Block* block;
+        struct {
+            U1 cond;
+            Block* body;
+            Block* merge;
+        } if_node;
+        struct {
+            Block* merge;
+        } end_if;
+        struct {
+            Block* body;
+            Block* continue_block;
+            Block* merge;
+        } loop;
+        struct {
+            U1 cond;
+            Block* loop_header;
+            Block* merge;
+        } repeat;
+        struct {
+            U1 cond;
+            Block* merge;
+            Block* skip;
+        } break_node;
+    };
+
+    Data data{};
+    Type type{};
+};
+using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
new file mode 100644
index 000000000..4d0b8b8e5
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -0,0 +1,454 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/attribute.h"
+
+namespace Shader::IR {
+
+bool IsGeneric(Attribute attribute) noexcept {
+    return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
+}
+
+u32 GenericAttributeIndex(Attribute attribute) {
+    if (!IsGeneric(attribute)) {
+        throw InvalidArgument("Attribute is not generic {}", attribute);
+    }
+    return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
+}
+
+u32 GenericAttributeElement(Attribute attribute) {
+    if (!IsGeneric(attribute)) {
+        throw InvalidArgument("Attribute is not generic {}", attribute);
+    }
+    return static_cast<u32>(attribute) % 4;
+}
+
+std::string NameOf(Attribute attribute) {
+    switch (attribute) {
+    case Attribute::PrimitiveId:
+        return "PrimitiveId";
+    case Attribute::Layer:
+        return "Layer";
+    case Attribute::ViewportIndex:
+        return "ViewportIndex";
+    case Attribute::PointSize:
+        return "PointSize";
+    case Attribute::PositionX:
+        return "Position.X";
+    case Attribute::PositionY:
+        return "Position.Y";
+    case Attribute::PositionZ:
+        return "Position.Z";
+    case Attribute::PositionW:
+        return "Position.W";
+    case Attribute::Generic0X:
+        return "Generic[0].X";
+    case Attribute::Generic0Y:
+        return "Generic[0].Y";
+    case Attribute::Generic0Z:
+        return "Generic[0].Z";
+    case Attribute::Generic0W:
+        return "Generic[0].W";
+    case Attribute::Generic1X:
+        return "Generic[1].X";
+    case Attribute::Generic1Y:
+        return "Generic[1].Y";
+    case Attribute::Generic1Z:
+        return "Generic[1].Z";
+    case Attribute::Generic1W:
+        return "Generic[1].W";
+    case Attribute::Generic2X:
+        return "Generic[2].X";
+    case Attribute::Generic2Y:
+        return "Generic[2].Y";
+    case Attribute::Generic2Z:
+        return "Generic[2].Z";
+    case Attribute::Generic2W:
+        return "Generic[2].W";
+    case Attribute::Generic3X:
+        return "Generic[3].X";
+    case Attribute::Generic3Y:
+        return "Generic[3].Y";
+    case Attribute::Generic3Z:
+        return "Generic[3].Z";
+    case Attribute::Generic3W:
+        return "Generic[3].W";
+    case Attribute::Generic4X:
+        return "Generic[4].X";
+    case Attribute::Generic4Y:
+        return "Generic[4].Y";
+    case Attribute::Generic4Z:
+        return "Generic[4].Z";
+    case Attribute::Generic4W:
+        return "Generic[4].W";
+    case Attribute::Generic5X:
+        return "Generic[5].X";
+    case Attribute::Generic5Y:
+        return "Generic[5].Y";
+    case Attribute::Generic5Z:
+        return "Generic[5].Z";
+    case Attribute::Generic5W:
+        return "Generic[5].W";
+    case Attribute::Generic6X:
+        return "Generic[6].X";
+    case Attribute::Generic6Y:
+        return "Generic[6].Y";
+    case Attribute::Generic6Z:
+        return "Generic[6].Z";
+    case Attribute::Generic6W:
+        return "Generic[6].W";
+    case Attribute::Generic7X:
+        return "Generic[7].X";
+    case Attribute::Generic7Y:
+        return "Generic[7].Y";
+    case Attribute::Generic7Z:
+        return "Generic[7].Z";
+    case Attribute::Generic7W:
+        return "Generic[7].W";
+    case Attribute::Generic8X:
+        return "Generic[8].X";
+    case Attribute::Generic8Y:
+        return "Generic[8].Y";
+    case Attribute::Generic8Z:
+        return "Generic[8].Z";
+    case Attribute::Generic8W:
+        return "Generic[8].W";
+    case Attribute::Generic9X:
+        return "Generic[9].X";
+    case Attribute::Generic9Y:
+        return "Generic[9].Y";
+    case Attribute::Generic9Z:
+        return "Generic[9].Z";
+    case Attribute::Generic9W:
+        return "Generic[9].W";
+    case Attribute::Generic10X:
+        return "Generic[10].X";
+    case Attribute::Generic10Y:
+        return "Generic[10].Y";
+    case Attribute::Generic10Z:
+        return "Generic[10].Z";
+    case Attribute::Generic10W:
+        return "Generic[10].W";
+    case Attribute::Generic11X:
+        return "Generic[11].X";
+    case Attribute::Generic11Y:
+        return "Generic[11].Y";
+    case Attribute::Generic11Z:
+        return "Generic[11].Z";
+    case Attribute::Generic11W:
+        return "Generic[11].W";
+    case Attribute::Generic12X:
+        return "Generic[12].X";
+    case Attribute::Generic12Y:
+        return "Generic[12].Y";
+    case Attribute::Generic12Z:
+        return "Generic[12].Z";
+    case Attribute::Generic12W:
+        return "Generic[12].W";
+    case Attribute::Generic13X:
+        return "Generic[13].X";
+    case Attribute::Generic13Y:
+        return "Generic[13].Y";
+    case Attribute::Generic13Z:
+        return "Generic[13].Z";
+    case Attribute::Generic13W:
+        return "Generic[13].W";
+    case Attribute::Generic14X:
+        return "Generic[14].X";
+    case Attribute::Generic14Y:
+        return "Generic[14].Y";
+    case Attribute::Generic14Z:
+        return "Generic[14].Z";
+    case Attribute::Generic14W:
+        return "Generic[14].W";
+    case Attribute::Generic15X:
+        return "Generic[15].X";
+    case Attribute::Generic15Y:
+        return "Generic[15].Y";
+    case Attribute::Generic15Z:
+        return "Generic[15].Z";
+    case Attribute::Generic15W:
+        return "Generic[15].W";
+    case Attribute::Generic16X:
+        return "Generic[16].X";
+    case Attribute::Generic16Y:
+        return "Generic[16].Y";
+    case Attribute::Generic16Z:
+        return "Generic[16].Z";
+    case Attribute::Generic16W:
+        return "Generic[16].W";
+    case Attribute::Generic17X:
+        return "Generic[17].X";
+    case Attribute::Generic17Y:
+        return "Generic[17].Y";
+    case Attribute::Generic17Z:
+        return "Generic[17].Z";
+    case Attribute::Generic17W:
+        return "Generic[17].W";
+    case Attribute::Generic18X:
+        return "Generic[18].X";
+    case Attribute::Generic18Y:
+        return "Generic[18].Y";
+    case Attribute::Generic18Z:
+        return "Generic[18].Z";
+    case Attribute::Generic18W:
+        return "Generic[18].W";
+    case Attribute::Generic19X:
+        return "Generic[19].X";
+    case Attribute::Generic19Y:
+        return "Generic[19].Y";
+    case Attribute::Generic19Z:
+        return "Generic[19].Z";
+    case Attribute::Generic19W:
+        return "Generic[19].W";
+    case Attribute::Generic20X:
+        return "Generic[20].X";
+    case Attribute::Generic20Y:
+        return "Generic[20].Y";
+    case Attribute::Generic20Z:
+        return "Generic[20].Z";
+    case Attribute::Generic20W:
+        return "Generic[20].W";
+    case Attribute::Generic21X:
+        return "Generic[21].X";
+    case Attribute::Generic21Y:
+        return "Generic[21].Y";
+    case Attribute::Generic21Z:
+        return "Generic[21].Z";
+    case Attribute::Generic21W:
+        return "Generic[21].W";
+    case Attribute::Generic22X:
+        return "Generic[22].X";
+    case Attribute::Generic22Y:
+        return "Generic[22].Y";
+    case Attribute::Generic22Z:
+        return "Generic[22].Z";
+    case Attribute::Generic22W:
+        return "Generic[22].W";
+    case Attribute::Generic23X:
+        return "Generic[23].X";
+    case Attribute::Generic23Y:
+        return "Generic[23].Y";
+    case Attribute::Generic23Z:
+        return "Generic[23].Z";
+    case Attribute::Generic23W:
+        return "Generic[23].W";
+    case Attribute::Generic24X:
+        return "Generic[24].X";
+    case Attribute::Generic24Y:
+        return "Generic[24].Y";
+    case Attribute::Generic24Z:
+        return "Generic[24].Z";
+    case Attribute::Generic24W:
+        return "Generic[24].W";
+    case Attribute::Generic25X:
+        return "Generic[25].X";
+    case Attribute::Generic25Y:
+        return "Generic[25].Y";
+    case Attribute::Generic25Z:
+        return "Generic[25].Z";
+    case Attribute::Generic25W:
+        return "Generic[25].W";
+    case Attribute::Generic26X:
+        return "Generic[26].X";
+    case Attribute::Generic26Y:
+        return "Generic[26].Y";
+    case Attribute::Generic26Z:
+        return "Generic[26].Z";
+    case Attribute::Generic26W:
+        return "Generic[26].W";
+    case Attribute::Generic27X:
+        return "Generic[27].X";
+    case Attribute::Generic27Y:
+        return "Generic[27].Y";
+    case Attribute::Generic27Z:
+        return "Generic[27].Z";
+    case Attribute::Generic27W:
+        return "Generic[27].W";
+    case Attribute::Generic28X:
+        return "Generic[28].X";
+    case Attribute::Generic28Y:
+        return "Generic[28].Y";
+    case Attribute::Generic28Z:
+        return "Generic[28].Z";
+    case Attribute::Generic28W:
+        return "Generic[28].W";
+    case Attribute::Generic29X:
+        return "Generic[29].X";
+    case Attribute::Generic29Y:
+        return "Generic[29].Y";
+    case Attribute::Generic29Z:
+        return "Generic[29].Z";
+    case Attribute::Generic29W:
+        return "Generic[29].W";
+    case Attribute::Generic30X:
+        return "Generic[30].X";
+    case Attribute::Generic30Y:
+        return "Generic[30].Y";
+    case Attribute::Generic30Z:
+        return "Generic[30].Z";
+    case Attribute::Generic30W:
+        return "Generic[30].W";
+    case Attribute::Generic31X:
+        return "Generic[31].X";
+    case Attribute::Generic31Y:
+        return "Generic[31].Y";
+    case Attribute::Generic31Z:
+        return "Generic[31].Z";
+    case Attribute::Generic31W:
+        return "Generic[31].W";
+    case Attribute::ColorFrontDiffuseR:
+        return "ColorFrontDiffuse.R";
+    case Attribute::ColorFrontDiffuseG:
+        return "ColorFrontDiffuse.G";
+    case Attribute::ColorFrontDiffuseB:
+        return "ColorFrontDiffuse.B";
+    case Attribute::ColorFrontDiffuseA:
+        return "ColorFrontDiffuse.A";
+    case Attribute::ColorFrontSpecularR:
+        return "ColorFrontSpecular.R";
+    case Attribute::ColorFrontSpecularG:
+        return "ColorFrontSpecular.G";
+    case Attribute::ColorFrontSpecularB:
+        return "ColorFrontSpecular.B";
+    case Attribute::ColorFrontSpecularA:
+        return "ColorFrontSpecular.A";
+    case Attribute::ColorBackDiffuseR:
+        return "ColorBackDiffuse.R";
+    case Attribute::ColorBackDiffuseG:
+        return "ColorBackDiffuse.G";
+    case Attribute::ColorBackDiffuseB:
+        return "ColorBackDiffuse.B";
+    case Attribute::ColorBackDiffuseA:
+        return "ColorBackDiffuse.A";
+    case Attribute::ColorBackSpecularR:
+        return "ColorBackSpecular.R";
+    case Attribute::ColorBackSpecularG:
+        return "ColorBackSpecular.G";
+    case Attribute::ColorBackSpecularB:
+        return "ColorBackSpecular.B";
+    case Attribute::ColorBackSpecularA:
+        return "ColorBackSpecular.A";
+    case Attribute::ClipDistance0:
+        return "ClipDistance[0]";
+    case Attribute::ClipDistance1:
+        return "ClipDistance[1]";
+    case Attribute::ClipDistance2:
+        return "ClipDistance[2]";
+    case Attribute::ClipDistance3:
+        return "ClipDistance[3]";
+    case Attribute::ClipDistance4:
+        return "ClipDistance[4]";
+    case Attribute::ClipDistance5:
+        return "ClipDistance[5]";
+    case Attribute::ClipDistance6:
+        return "ClipDistance[6]";
+    case Attribute::ClipDistance7:
+        return "ClipDistance[7]";
+    case Attribute::PointSpriteS:
+        return "PointSprite.S";
+    case Attribute::PointSpriteT:
+        return "PointSprite.T";
+    case Attribute::FogCoordinate:
+        return "FogCoordinate";
+    case Attribute::TessellationEvaluationPointU:
+        return "TessellationEvaluationPoint.U";
+    case Attribute::TessellationEvaluationPointV:
+        return "TessellationEvaluationPoint.V";
+    case Attribute::InstanceId:
+        return "InstanceId";
+    case Attribute::VertexId:
+        return "VertexId";
+    case Attribute::FixedFncTexture0S:
+        return "FixedFncTexture[0].S";
+    case Attribute::FixedFncTexture0T:
+        return "FixedFncTexture[0].T";
+    case Attribute::FixedFncTexture0R:
+        return "FixedFncTexture[0].R";
+    case Attribute::FixedFncTexture0Q:
+        return "FixedFncTexture[0].Q";
+    case Attribute::FixedFncTexture1S:
+        return "FixedFncTexture[1].S";
+    case Attribute::FixedFncTexture1T:
+        return "FixedFncTexture[1].T";
+    case Attribute::FixedFncTexture1R:
+        return "FixedFncTexture[1].R";
+    case Attribute::FixedFncTexture1Q:
+        return "FixedFncTexture[1].Q";
+    case Attribute::FixedFncTexture2S:
+        return "FixedFncTexture[2].S";
+    case Attribute::FixedFncTexture2T:
+        return "FixedFncTexture[2].T";
+    case Attribute::FixedFncTexture2R:
+        return "FixedFncTexture[2].R";
+    case Attribute::FixedFncTexture2Q:
+        return "FixedFncTexture[2].Q";
+    case Attribute::FixedFncTexture3S:
+        return "FixedFncTexture[3].S";
+    case Attribute::FixedFncTexture3T:
+        return "FixedFncTexture[3].T";
+    case Attribute::FixedFncTexture3R:
+        return "FixedFncTexture[3].R";
+    case Attribute::FixedFncTexture3Q:
+        return "FixedFncTexture[3].Q";
+    case Attribute::FixedFncTexture4S:
+        return "FixedFncTexture[4].S";
+    case Attribute::FixedFncTexture4T:
+        return "FixedFncTexture[4].T";
+    case Attribute::FixedFncTexture4R:
+        return "FixedFncTexture[4].R";
+    case Attribute::FixedFncTexture4Q:
+        return "FixedFncTexture[4].Q";
+    case Attribute::FixedFncTexture5S:
+        return "FixedFncTexture[5].S";
+    case Attribute::FixedFncTexture5T:
+        return "FixedFncTexture[5].T";
+    case Attribute::FixedFncTexture5R:
+        return "FixedFncTexture[5].R";
+    case Attribute::FixedFncTexture5Q:
+        return "FixedFncTexture[5].Q";
+    case Attribute::FixedFncTexture6S:
+        return "FixedFncTexture[6].S";
+    case Attribute::FixedFncTexture6T:
+        return "FixedFncTexture[6].T";
+    case Attribute::FixedFncTexture6R:
+        return "FixedFncTexture[6].R";
+    case Attribute::FixedFncTexture6Q:
+        return "FixedFncTexture[6].Q";
+    case Attribute::FixedFncTexture7S:
+        return "FixedFncTexture[7].S";
+    case Attribute::FixedFncTexture7T:
+        return "FixedFncTexture[7].T";
+    case Attribute::FixedFncTexture7R:
+        return "FixedFncTexture[7].R";
+    case Attribute::FixedFncTexture7Q:
+        return "FixedFncTexture[7].Q";
+    case Attribute::FixedFncTexture8S:
+        return "FixedFncTexture[8].S";
+    case Attribute::FixedFncTexture8T:
+        return "FixedFncTexture[8].T";
+    case Attribute::FixedFncTexture8R:
+        return "FixedFncTexture[8].R";
+    case Attribute::FixedFncTexture8Q:
+        return "FixedFncTexture[8].Q";
+    case Attribute::FixedFncTexture9S:
+        return "FixedFncTexture[9].S";
+    case Attribute::FixedFncTexture9T:
+        return "FixedFncTexture[9].T";
+    case Attribute::FixedFncTexture9R:
+        return "FixedFncTexture[9].R";
+    case Attribute::FixedFncTexture9Q:
+        return "FixedFncTexture[9].Q";
+    case Attribute::ViewportMask:
+        return "ViewportMask";
+    case Attribute::FrontFace:
+        return "FrontFace";
+    }
+    return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
new file mode 100644
index 000000000..ca1199494
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -0,0 +1,250 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class Attribute : u64 {
+    PrimitiveId = 24,
+    Layer = 25,
+    ViewportIndex = 26,
+    PointSize = 27,
+    PositionX = 28,
+    PositionY = 29,
+    PositionZ = 30,
+    PositionW = 31,
+    Generic0X = 32,
+    Generic0Y = 33,
+    Generic0Z = 34,
+    Generic0W = 35,
+    Generic1X = 36,
+    Generic1Y = 37,
+    Generic1Z = 38,
+    Generic1W = 39,
+    Generic2X = 40,
+    Generic2Y = 41,
+    Generic2Z = 42,
+    Generic2W = 43,
+    Generic3X = 44,
+    Generic3Y = 45,
+    Generic3Z = 46,
+    Generic3W = 47,
+    Generic4X = 48,
+    Generic4Y = 49,
+    Generic4Z = 50,
+    Generic4W = 51,
+    Generic5X = 52,
+    Generic5Y = 53,
+    Generic5Z = 54,
+    Generic5W = 55,
+    Generic6X = 56,
+    Generic6Y = 57,
+    Generic6Z = 58,
+    Generic6W = 59,
+    Generic7X = 60,
+    Generic7Y = 61,
+    Generic7Z = 62,
+    Generic7W = 63,
+    Generic8X = 64,
+    Generic8Y = 65,
+    Generic8Z = 66,
+    Generic8W = 67,
+    Generic9X = 68,
+    Generic9Y = 69,
+    Generic9Z = 70,
+    Generic9W = 71,
+    Generic10X = 72,
+    Generic10Y = 73,
+    Generic10Z = 74,
+    Generic10W = 75,
+    Generic11X = 76,
+    Generic11Y = 77,
+    Generic11Z = 78,
+    Generic11W = 79,
+    Generic12X = 80,
+    Generic12Y = 81,
+    Generic12Z = 82,
+    Generic12W = 83,
+    Generic13X = 84,
+    Generic13Y = 85,
+    Generic13Z = 86,
+    Generic13W = 87,
+    Generic14X = 88,
+    Generic14Y = 89,
+    Generic14Z = 90,
+    Generic14W = 91,
+    Generic15X = 92,
+    Generic15Y = 93,
+    Generic15Z = 94,
+    Generic15W = 95,
+    Generic16X = 96,
+    Generic16Y = 97,
+    Generic16Z = 98,
+    Generic16W = 99,
+    Generic17X = 100,
+    Generic17Y = 101,
+    Generic17Z = 102,
+    Generic17W = 103,
+    Generic18X = 104,
+    Generic18Y = 105,
+    Generic18Z = 106,
+    Generic18W = 107,
+    Generic19X = 108,
+    Generic19Y = 109,
+    Generic19Z = 110,
+    Generic19W = 111,
+    Generic20X = 112,
+    Generic20Y = 113,
+    Generic20Z = 114,
+    Generic20W = 115,
+    Generic21X = 116,
+    Generic21Y = 117,
+    Generic21Z = 118,
+    Generic21W = 119,
+    Generic22X = 120,
+    Generic22Y = 121,
+    Generic22Z = 122,
+    Generic22W = 123,
+    Generic23X = 124,
+    Generic23Y = 125,
+    Generic23Z = 126,
+    Generic23W = 127,
+    Generic24X = 128,
+    Generic24Y = 129,
+    Generic24Z = 130,
+    Generic24W = 131,
+    Generic25X = 132,
+    Generic25Y = 133,
+    Generic25Z = 134,
+    Generic25W = 135,
+    Generic26X = 136,
+    Generic26Y = 137,
+    Generic26Z = 138,
+    Generic26W = 139,
+    Generic27X = 140,
+    Generic27Y = 141,
+    Generic27Z = 142,
+    Generic27W = 143,
+    Generic28X = 144,
+    Generic28Y = 145,
+    Generic28Z = 146,
+    Generic28W = 147,
+    Generic29X = 148,
+    Generic29Y = 149,
+    Generic29Z = 150,
+    Generic29W = 151,
+    Generic30X = 152,
+    Generic30Y = 153,
+    Generic30Z = 154,
+    Generic30W = 155,
+    Generic31X = 156,
+    Generic31Y = 157,
+    Generic31Z = 158,
+    Generic31W = 159,
+    ColorFrontDiffuseR = 160,
+    ColorFrontDiffuseG = 161,
+    ColorFrontDiffuseB = 162,
+    ColorFrontDiffuseA = 163,
+    ColorFrontSpecularR = 164,
+    ColorFrontSpecularG = 165,
+    ColorFrontSpecularB = 166,
+    ColorFrontSpecularA = 167,
+    ColorBackDiffuseR = 168,
+    ColorBackDiffuseG = 169,
+    ColorBackDiffuseB = 170,
+    ColorBackDiffuseA = 171,
+    ColorBackSpecularR = 172,
+    ColorBackSpecularG = 173,
+    ColorBackSpecularB = 174,
+    ColorBackSpecularA = 175,
+    ClipDistance0 = 176,
+    ClipDistance1 = 177,
+    ClipDistance2 = 178,
+    ClipDistance3 = 179,
+    ClipDistance4 = 180,
+    ClipDistance5 = 181,
+    ClipDistance6 = 182,
+    ClipDistance7 = 183,
+    PointSpriteS = 184,
+    PointSpriteT = 185,
+    FogCoordinate = 186,
+    TessellationEvaluationPointU = 188,
+    TessellationEvaluationPointV = 189,
+    InstanceId = 190,
+    VertexId = 191,
+    FixedFncTexture0S = 192,
+    FixedFncTexture0T = 193,
+    FixedFncTexture0R = 194,
+    FixedFncTexture0Q = 195,
+    FixedFncTexture1S = 196,
+    FixedFncTexture1T = 197,
+    FixedFncTexture1R = 198,
+    FixedFncTexture1Q = 199,
+    FixedFncTexture2S = 200,
+    FixedFncTexture2T = 201,
+    FixedFncTexture2R = 202,
+    FixedFncTexture2Q = 203,
+    FixedFncTexture3S = 204,
+    FixedFncTexture3T = 205,
+    FixedFncTexture3R = 206,
+    FixedFncTexture3Q = 207,
+    FixedFncTexture4S = 208,
+    FixedFncTexture4T = 209,
+    FixedFncTexture4R = 210,
+    FixedFncTexture4Q = 211,
+    FixedFncTexture5S = 212,
+    FixedFncTexture5T = 213,
+    FixedFncTexture5R = 214,
+    FixedFncTexture5Q = 215,
+    FixedFncTexture6S = 216,
+    FixedFncTexture6T = 217,
+    FixedFncTexture6R = 218,
+    FixedFncTexture6Q = 219,
+    FixedFncTexture7S = 220,
+    FixedFncTexture7T = 221,
+    FixedFncTexture7R = 222,
+    FixedFncTexture7Q = 223,
+    FixedFncTexture8S = 224,
+    FixedFncTexture8T = 225,
+    FixedFncTexture8R = 226,
+    FixedFncTexture8Q = 227,
+    FixedFncTexture9S = 228,
+    FixedFncTexture9T = 229,
+    FixedFncTexture9R = 230,
+    FixedFncTexture9Q = 231,
+    ViewportMask = 232,
+    FrontFace = 255,
+};
+
+constexpr size_t NUM_GENERICS = 32;
+
+[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
+
+[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
+
+[[nodiscard]] u32 GenericAttributeElement(Attribute attribute);
+
+[[nodiscard]] std::string NameOf(Attribute attribute);
+
+[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept {
+    return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value);
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Attribute> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) {
+        return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute));
+    }
+};
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
new file mode 100644
index 000000000..7c08b25ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <initializer_list>
+#include <map>
+#include <memory>
+
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
+
+Block::~Block() = default;
+
+void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
+    PrependNewInst(end(), op, args);
+}
+
+Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
+                                      std::initializer_list<Value> args, u32 flags) {
+    Inst* const inst{inst_pool->Create(op, flags)};
+    const auto result_it{instructions.insert(insertion_point, *inst)};
+
+    if (inst->NumArgs() != args.size()) {
+        throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
+    }
+    std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
+        inst->SetArg(index, arg);
+        ++index;
+    });
+    return result_it;
+}
+
+void Block::AddBranch(Block* block) {
+    if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
+        throw LogicError("Successor already inserted");
+    }
+    if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
+        throw LogicError("Predecessor already inserted");
+    }
+    imm_successors.push_back(block);
+    block->imm_predecessors.push_back(this);
+}
+
+static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
+                                Block* block) {
+    if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
+        return fmt::format("{{Block ${}}}", it->second);
+    }
+    return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
+}
+
+static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
+                        const Inst* inst) {
+    const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
+    if (is_inserted) {
+        ++inst_index;
+    }
+    return it->second;
+}
+
+static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
+                              const Value& arg) {
+    if (arg.IsEmpty()) {
+        return "<null>";
+    }
+    if (!arg.IsImmediate() || arg.IsIdentity()) {
+        return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
+    }
+    switch (arg.Type()) {
+    case Type::U1:
+        return fmt::format("#{}", arg.U1() ? "true" : "false");
+    case Type::U8:
+        return fmt::format("#{}", arg.U8());
+    case Type::U16:
+        return fmt::format("#{}", arg.U16());
+    case Type::U32:
+        return fmt::format("#{}", arg.U32());
+    case Type::U64:
+        return fmt::format("#{}", arg.U64());
+    case Type::F32:
+        return fmt::format("#{}", arg.F32());
+    case Type::Reg:
+        return fmt::format("{}", arg.Reg());
+    case Type::Pred:
+        return fmt::format("{}", arg.Pred());
+    case Type::Attribute:
+        return fmt::format("{}", arg.Attribute());
+    default:
+        return "<unknown immediate type>";
+    }
+}
+
+std::string DumpBlock(const Block& block) {
+    size_t inst_index{0};
+    std::map<const Inst*, size_t> inst_to_index;
+    return DumpBlock(block, {}, inst_to_index, inst_index);
+}
+
+std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index,
+                      std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) {
+    std::string ret{"Block"};
+    if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
+        ret += fmt::format(" ${}", it->second);
+    }
+    ret += '\n';
+    for (const Inst& inst : block) {
+        const Opcode op{inst.GetOpcode()};
+        ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
+        if (TypeOf(op) != Type::Void) {
+            ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
+        } else {
+            ret += fmt::format("         {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
+        }
+        const size_t arg_count{inst.NumArgs()};
+        for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
+            const Value arg{inst.Arg(arg_index)};
+            const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
+            ret += arg_index != 0 ? ", " : " ";
+            if (op == Opcode::Phi) {
+                ret += fmt::format("[ {}, {} ]", arg_str,
+                                   BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
+            } else {
+                ret += arg_str;
+            }
+            if (op != Opcode::Phi) {
+                const Type actual_type{arg.Type()};
+                const Type expected_type{ArgTypeOf(op, arg_index)};
+                if (!AreTypesCompatible(actual_type, expected_type)) {
+                    ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
+                }
+            }
+        }
+        if (TypeOf(op) != Type::Void) {
+            ret += fmt::format(" (uses: {})\n", inst.UseCount());
+        } else {
+            ret += '\n';
+        }
+    }
+    return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
new file mode 100644
index 000000000..7e134b4c7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -0,0 +1,185 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <initializer_list>
+#include <map>
+#include <span>
+#include <vector>
+
+#include <boost/intrusive/list.hpp>
+
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/condition.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::IR {
+
+class Block {
+public:
+    using InstructionList = boost::intrusive::list<Inst>;
+    using size_type = InstructionList::size_type;
+    using iterator = InstructionList::iterator;
+    using const_iterator = InstructionList::const_iterator;
+    using reverse_iterator = InstructionList::reverse_iterator;
+    using const_reverse_iterator = InstructionList::const_reverse_iterator;
+
+    explicit Block(ObjectPool<Inst>& inst_pool_);
+    ~Block();
+
+    Block(const Block&) = delete;
+    Block& operator=(const Block&) = delete;
+
+    Block(Block&&) = default;
+    Block& operator=(Block&&) = default;
+
+    /// Appends a new instruction to the end of this basic block.
+    void AppendNewInst(Opcode op, std::initializer_list<Value> args);
+
+    /// Prepends a new instruction to this basic block before the insertion point.
+    iterator PrependNewInst(iterator insertion_point, Opcode op,
+                            std::initializer_list<Value> args = {}, u32 flags = 0);
+
+    /// Adds a new branch to this basic block.
+    void AddBranch(Block* block);
+
+    /// Gets a mutable reference to the instruction list for this basic block.
+    [[nodiscard]] InstructionList& Instructions() noexcept {
+        return instructions;
+    }
+    /// Gets an immutable reference to the instruction list for this basic block.
+    [[nodiscard]] const InstructionList& Instructions() const noexcept {
+        return instructions;
+    }
+
+    /// Gets an immutable span to the immediate predecessors.
+    [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
+        return imm_predecessors;
+    }
+    /// Gets an immutable span to the immediate successors.
+    [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
+        return imm_successors;
+    }
+
+    /// Intrusively store the host definition of this instruction.
+    template <typename DefinitionType>
+    void SetDefinition(DefinitionType def) {
+        definition = Common::BitCast<u32>(def);
+    }
+
+    /// Return the intrusively stored host definition of this instruction.
+    template <typename DefinitionType>
+    [[nodiscard]] DefinitionType Definition() const noexcept {
+        return Common::BitCast<DefinitionType>(definition);
+    }
+
+    void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept {
+        ssa_reg_values[RegIndex(reg)] = value;
+    }
+    const Value& SsaRegValue(IR::Reg reg) const noexcept {
+        return ssa_reg_values[RegIndex(reg)];
+    }
+
+    void SsaSeal() noexcept {
+        is_ssa_sealed = true;
+    }
+    [[nodiscard]] bool IsSsaSealed() const noexcept {
+        return is_ssa_sealed;
+    }
+
+    [[nodiscard]] bool empty() const {
+        return instructions.empty();
+    }
+    [[nodiscard]] size_type size() const {
+        return instructions.size();
+    }
+
+    [[nodiscard]] Inst& front() {
+        return instructions.front();
+    }
+    [[nodiscard]] const Inst& front() const {
+        return instructions.front();
+    }
+
+    [[nodiscard]] Inst& back() {
+        return instructions.back();
+    }
+    [[nodiscard]] const Inst& back() const {
+        return instructions.back();
+    }
+
+    [[nodiscard]] iterator begin() {
+        return instructions.begin();
+    }
+    [[nodiscard]] const_iterator begin() const {
+        return instructions.begin();
+    }
+    [[nodiscard]] iterator end() {
+        return instructions.end();
+    }
+    [[nodiscard]] const_iterator end() const {
+        return instructions.end();
+    }
+
+    [[nodiscard]] reverse_iterator rbegin() {
+        return instructions.rbegin();
+    }
+    [[nodiscard]] const_reverse_iterator rbegin() const {
+        return instructions.rbegin();
+    }
+    [[nodiscard]] reverse_iterator rend() {
+        return instructions.rend();
+    }
+    [[nodiscard]] const_reverse_iterator rend() const {
+        return instructions.rend();
+    }
+
+    [[nodiscard]] const_iterator cbegin() const {
+        return instructions.cbegin();
+    }
+    [[nodiscard]] const_iterator cend() const {
+        return instructions.cend();
+    }
+
+    [[nodiscard]] const_reverse_iterator crbegin() const {
+        return instructions.crbegin();
+    }
+    [[nodiscard]] const_reverse_iterator crend() const {
+        return instructions.crend();
+    }
+
+private:
+    /// Memory pool for instruction list
+    ObjectPool<Inst>* inst_pool;
+
+    /// List of instructions in this block
+    InstructionList instructions;
+
+    /// Block immediate predecessors
+    std::vector<Block*> imm_predecessors;
+    /// Block immediate successors
+    std::vector<Block*> imm_successors;
+
+    /// Intrusively store the value of a register in the block.
+    std::array<Value, NUM_REGS> ssa_reg_values;
+    /// Intrusively store if the block is sealed in the SSA pass.
+    bool is_ssa_sealed{false};
+
+    /// Intrusively stored host definition of this block.
+    u32 definition{};
+};
+
+using BlockList = std::vector<Block*>;
+
+[[nodiscard]] std::string DumpBlock(const Block& block);
+
+[[nodiscard]] std::string DumpBlock(const Block& block,
+                                    const std::map<const Block*, size_t>& block_to_index,
+                                    std::map<const Inst*, size_t>& inst_to_index,
+                                    size_t& inst_index);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h
new file mode 100644
index 000000000..a52ccbd58
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h
@@ -0,0 +1,56 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <type_traits>
+#include <queue>
+
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+template <typename Pred>
+auto BreadthFirstSearch(const Value& value, Pred&& pred)
+    -> std::invoke_result_t<Pred, const Inst*> {
+    if (value.IsImmediate()) {
+        // Nothing to do with immediates
+        return std::nullopt;
+    }
+    // Breadth-first search visiting the right most arguments first
+    // Small vector has been determined from shaders in Super Smash Bros. Ultimate
+    boost::container::small_vector<const Inst*, 2> visited;
+    std::queue<const Inst*> queue;
+    queue.push(value.InstRecursive());
+
+    while (!queue.empty()) {
+        // Pop one instruction from the queue
+        const Inst* const inst{queue.front()};
+        queue.pop();
+        if (const std::optional result = pred(inst)) {
+            // This is the instruction we were looking for
+            return result;
+        }
+        // Visit the right most arguments first
+        for (size_t arg = inst->NumArgs(); arg--;) {
+            const Value arg_value{inst->Arg(arg)};
+            if (arg_value.IsImmediate()) {
+                continue;
+            }
+            // Queue instruction if it hasn't been visited
+            const Inst* const arg_inst{arg_value.InstRecursive()};
+            if (std::ranges::find(visited, arg_inst) == visited.end()) {
+                visited.push_back(arg_inst);
+                queue.push(arg_inst);
+            }
+        }
+    }
+    // SSA tree has been traversed and the result hasn't been found
+    return std::nullopt;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
new file mode 100644
index 000000000..fc18ea2a2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -0,0 +1,29 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/condition.h"
+
+namespace Shader::IR {
+
+std::string NameOf(Condition condition) {
+    std::string ret;
+    if (condition.GetFlowTest() != FlowTest::T) {
+        ret = fmt::to_string(condition.GetFlowTest());
+    }
+    const auto [pred, negated]{condition.GetPred()};
+    if (!ret.empty()) {
+        ret += '&';
+    }
+    if (negated) {
+        ret += '!';
+    }
+    ret += fmt::to_string(pred);
+    return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
new file mode 100644
index 000000000..aa8597c60
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -0,0 +1,60 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <string>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/flow_test.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+
+namespace Shader::IR {
+
+class Condition {
+public:
+    Condition() noexcept = default;
+
+    explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept
+        : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)},
+          pred_negated{pred_negated_ ? u8{1} : u8{0}} {}
+
+    explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept
+        : Condition(FlowTest::T, pred_, pred_negated_) {}
+
+    explicit Condition(bool value) : Condition(Pred::PT, !value) {}
+
+    auto operator<=>(const Condition&) const noexcept = default;
+
+    [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
+        return static_cast<IR::FlowTest>(flow_test);
+    }
+
+    [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
+        return {static_cast<IR::Pred>(pred), pred_negated != 0};
+    }
+
+private:
+    u16 flow_test;
+    u8 pred;
+    u8 pred_negated;
+};
+
+std::string NameOf(Condition condition);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Condition> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::IR::Condition& cond, FormatContext& ctx) {
+        return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond));
+    }
+};
diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp
new file mode 100644
index 000000000..6ebb4ad89
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.cpp
@@ -0,0 +1,83 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/flow_test.h"
+
+namespace Shader::IR {
+
+std::string NameOf(FlowTest flow_test) {
+    switch (flow_test) {
+    case FlowTest::F:
+        return "F";
+    case FlowTest::LT:
+        return "LT";
+    case FlowTest::EQ:
+        return "EQ";
+    case FlowTest::LE:
+        return "LE";
+    case FlowTest::GT:
+        return "GT";
+    case FlowTest::NE:
+        return "NE";
+    case FlowTest::GE:
+        return "GE";
+    case FlowTest::NUM:
+        return "NUM";
+    case FlowTest::NaN:
+        return "NAN";
+    case FlowTest::LTU:
+        return "LTU";
+    case FlowTest::EQU:
+        return "EQU";
+    case FlowTest::LEU:
+        return "LEU";
+    case FlowTest::GTU:
+        return "GTU";
+    case FlowTest::NEU:
+        return "NEU";
+    case FlowTest::GEU:
+        return "GEU";
+    case FlowTest::T:
+        return "T";
+    case FlowTest::OFF:
+        return "OFF";
+    case FlowTest::LO:
+        return "LO";
+    case FlowTest::SFF:
+        return "SFF";
+    case FlowTest::LS:
+        return "LS";
+    case FlowTest::HI:
+        return "HI";
+    case FlowTest::SFT:
+        return "SFT";
+    case FlowTest::HS:
+        return "HS";
+    case FlowTest::OFT:
+        return "OFT";
+    case FlowTest::CSM_TA:
+        return "CSM_TA";
+    case FlowTest::CSM_TR:
+        return "CSM_TR";
+    case FlowTest::CSM_MX:
+        return "CSM_MX";
+    case FlowTest::FCSM_TA:
+        return "FCSM_TA";
+    case FlowTest::FCSM_TR:
+        return "FCSM_TR";
+    case FlowTest::FCSM_MX:
+        return "FCSM_MX";
+    case FlowTest::RLE:
+        return "RLE";
+    case FlowTest::RGT:
+        return "RGT";
+    }
+    return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test));
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h
new file mode 100644
index 000000000..09e113773
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.h
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class FlowTest : u64 {
+    F,
+    LT,
+    EQ,
+    LE,
+    GT,
+    NE,
+    GE,
+    NUM,
+    NaN,
+    LTU,
+    EQU,
+    LEU,
+    GTU,
+    NEU,
+    GEU,
+    T,
+    OFF,
+    LO,
+    SFF,
+    LS,
+    HI,
+    SFT,
+    HS,
+    OFT,
+    CSM_TA,
+    CSM_TR,
+    CSM_MX,
+    FCSM_TA,
+    FCSM_TR,
+    FCSM_MX,
+    RLE,
+    RGT,
+};
+
+[[nodiscard]] std::string NameOf(FlowTest flow_test);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::FlowTest> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) {
+        return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test));
+    }
+};
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
new file mode 100644
index 000000000..13159a68d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -0,0 +1,2017 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_cast.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+namespace {
+[[noreturn]] void ThrowInvalidType(Type type) {
+    throw InvalidArgument("Invalid type {}", type);
+}
+
+Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
+    if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
+        return ir.CompositeConstruct(bias_lod, lod_clamp);
+    } else if (!bias_lod.IsEmpty()) {
+        return bias_lod;
+    } else if (!lod_clamp.IsEmpty()) {
+        return lod_clamp;
+    } else {
+        return Value{};
+    }
+}
+} // Anonymous namespace
+
+U1 IREmitter::Imm1(bool value) const {
+    return U1{Value{value}};
+}
+
+U8 IREmitter::Imm8(u8 value) const {
+    return U8{Value{value}};
+}
+
+U16 IREmitter::Imm16(u16 value) const {
+    return U16{Value{value}};
+}
+
+U32 IREmitter::Imm32(u32 value) const {
+    return U32{Value{value}};
+}
+
+U32 IREmitter::Imm32(s32 value) const {
+    return U32{Value{static_cast<u32>(value)}};
+}
+
+F32 IREmitter::Imm32(f32 value) const {
+    return F32{Value{value}};
+}
+
+U64 IREmitter::Imm64(u64 value) const {
+    return U64{Value{value}};
+}
+
+U64 IREmitter::Imm64(s64 value) const {
+    return U64{Value{static_cast<u64>(value)}};
+}
+
+F64 IREmitter::Imm64(f64 value) const {
+    return F64{Value{value}};
+}
+
+U1 IREmitter::ConditionRef(const U1& value) {
+    return Inst<U1>(Opcode::ConditionRef, value);
+}
+
+void IREmitter::Reference(const Value& value) {
+    Inst(Opcode::Reference, value);
+}
+
+void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
+    Inst(Opcode::PhiMove, Value{&phi}, value);
+}
+
+void IREmitter::Prologue() {
+    Inst(Opcode::Prologue);
+}
+
+void IREmitter::Epilogue() {
+    Inst(Opcode::Epilogue);
+}
+
+void IREmitter::DemoteToHelperInvocation() {
+    Inst(Opcode::DemoteToHelperInvocation);
+}
+
+void IREmitter::EmitVertex(const U32& stream) {
+    Inst(Opcode::EmitVertex, stream);
+}
+
+void IREmitter::EndPrimitive(const U32& stream) {
+    Inst(Opcode::EndPrimitive, stream);
+}
+
+void IREmitter::Barrier() {
+    Inst(Opcode::Barrier);
+}
+
+void IREmitter::WorkgroupMemoryBarrier() {
+    Inst(Opcode::WorkgroupMemoryBarrier);
+}
+
+void IREmitter::DeviceMemoryBarrier() {
+    Inst(Opcode::DeviceMemoryBarrier);
+}
+
+U32 IREmitter::GetReg(IR::Reg reg) {
+    return Inst<U32>(Opcode::GetRegister, reg);
+}
+
+void IREmitter::SetReg(IR::Reg reg, const U32& value) {
+    Inst(Opcode::SetRegister, reg, value);
+}
+
+U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) {
+    if (pred == Pred::PT) {
+        return Imm1(!is_negated);
+    }
+    const U1 value{Inst<U1>(Opcode::GetPred, pred)};
+    if (is_negated) {
+        return Inst<U1>(Opcode::LogicalNot, value);
+    } else {
+        return value;
+    }
+}
+
+void IREmitter::SetPred(IR::Pred pred, const U1& value) {
+    if (pred != IR::Pred::PT) {
+        Inst(Opcode::SetPred, pred, value);
+    }
+}
+
+U1 IREmitter::GetGotoVariable(u32 id) {
+    return Inst<U1>(Opcode::GetGotoVariable, id);
+}
+
+void IREmitter::SetGotoVariable(u32 id, const U1& value) {
+    Inst(Opcode::SetGotoVariable, id, value);
+}
+
+U32 IREmitter::GetIndirectBranchVariable() {
+    return Inst<U32>(Opcode::GetIndirectBranchVariable);
+}
+
+void IREmitter::SetIndirectBranchVariable(const U32& value) {
+    Inst(Opcode::SetIndirectBranchVariable, value);
+}
+
+U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) {
+    return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
+}
+
+Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
+                         bool is_signed) {
+    switch (bitsize) {
+    case 8:
+        return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset);
+    case 16:
+        return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset);
+    case 32:
+        return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
+    case 64:
+        return Inst(Opcode::GetCbufU32x2, binding, byte_offset);
+    default:
+        throw InvalidArgument("Invalid bit size {}", bitsize);
+    }
+}
+
+F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) {
+    return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset);
+}
+
+U1 IREmitter::GetZFlag() {
+    return Inst<U1>(Opcode::GetZFlag);
+}
+
+U1 IREmitter::GetSFlag() {
+    return Inst<U1>(Opcode::GetSFlag);
+}
+
+U1 IREmitter::GetCFlag() {
+    return Inst<U1>(Opcode::GetCFlag);
+}
+
+U1 IREmitter::GetOFlag() {
+    return Inst<U1>(Opcode::GetOFlag);
+}
+
+void IREmitter::SetZFlag(const U1& value) {
+    Inst(Opcode::SetZFlag, value);
+}
+
+void IREmitter::SetSFlag(const U1& value) {
+    Inst(Opcode::SetSFlag, value);
+}
+
+void IREmitter::SetCFlag(const U1& value) {
+    Inst(Opcode::SetCFlag, value);
+}
+
+void IREmitter::SetOFlag(const U1& value) {
+    Inst(Opcode::SetOFlag, value);
+}
+
+static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
+    switch (flow_test) {
+    case FlowTest::F:
+        return ir.Imm1(false);
+    case FlowTest::LT:
+        return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())),
+                             ir.GetOFlag());
+    case FlowTest::EQ:
+        return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag());
+    case FlowTest::LE:
+        return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
+    case FlowTest::GT:
+        return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()),
+                             ir.LogicalNot(ir.GetZFlag()));
+    case FlowTest::NE:
+        return ir.LogicalNot(ir.GetZFlag());
+    case FlowTest::GE:
+        return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()));
+    case FlowTest::NUM:
+        return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
+    case FlowTest::NaN:
+        return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag());
+    case FlowTest::LTU:
+        return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag());
+    case FlowTest::EQU:
+        return ir.GetZFlag();
+    case FlowTest::LEU:
+        return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag());
+    case FlowTest::GTU:
+        return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()),
+                             ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
+    case FlowTest::NEU:
+        return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag()));
+    case FlowTest::GEU:
+        return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()),
+                             ir.GetOFlag());
+    case FlowTest::T:
+        return ir.Imm1(true);
+    case FlowTest::OFF:
+        return ir.LogicalNot(ir.GetOFlag());
+    case FlowTest::LO:
+        return ir.LogicalNot(ir.GetCFlag());
+    case FlowTest::SFF:
+        return ir.LogicalNot(ir.GetSFlag());
+    case FlowTest::LS:
+        return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag()));
+    case FlowTest::HI:
+        return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag()));
+    case FlowTest::SFT:
+        return ir.GetSFlag();
+    case FlowTest::HS:
+        return ir.GetCFlag();
+    case FlowTest::OFT:
+        return ir.GetOFlag();
+    case FlowTest::RLE:
+        return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag());
+    case FlowTest::RGT:
+        return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
+    case FlowTest::FCSM_TR:
+        LOG_WARNING(Shader, "(STUBBED) FCSM_TR");
+        return ir.Imm1(false);
+    case FlowTest::CSM_TA:
+    case FlowTest::CSM_TR:
+    case FlowTest::CSM_MX:
+    case FlowTest::FCSM_TA:
+    case FlowTest::FCSM_MX:
+    default:
+        throw NotImplementedException("Flow test {}", flow_test);
+    }
+}
+
+U1 IREmitter::Condition(IR::Condition cond) {
+    const FlowTest flow_test{cond.GetFlowTest()};
+    const auto [pred, is_negated]{cond.GetPred()};
+    if (flow_test == FlowTest::T) {
+        return GetPred(pred, is_negated);
+    }
+    return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
+}
+
+U1 IREmitter::GetFlowTestResult(FlowTest test) {
+    return GetFlowTest(*this, test);
+}
+
+F32 IREmitter::GetAttribute(IR::Attribute attribute) {
+    return GetAttribute(attribute, Imm32(0));
+}
+
+F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
+    return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
+}
+
+void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
+    Inst(Opcode::SetAttribute, attribute, value, vertex);
+}
+
+F32 IREmitter::GetAttributeIndexed(const U32& phys_address) {
+    return GetAttributeIndexed(phys_address, Imm32(0));
+}
+
+F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) {
+    return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex);
+}
+
+void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) {
+    Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);
+}
+
+F32 IREmitter::GetPatch(Patch patch) {
+    return Inst<F32>(Opcode::GetPatch, patch);
+}
+
+void IREmitter::SetPatch(Patch patch, const F32& value) {
+    Inst(Opcode::SetPatch, patch, value);
+}
+
+void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
+    Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
+}
+
+void IREmitter::SetSampleMask(const U32& value) {
+    Inst(Opcode::SetSampleMask, value);
+}
+
+void IREmitter::SetFragDepth(const F32& value) {
+    Inst(Opcode::SetFragDepth, value);
+}
+
+U32 IREmitter::WorkgroupIdX() {
+    return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
+}
+
+U32 IREmitter::WorkgroupIdY() {
+    return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)};
+}
+
+U32 IREmitter::WorkgroupIdZ() {
+    return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
+}
+
+Value IREmitter::LocalInvocationId() {
+    return Inst(Opcode::LocalInvocationId);
+}
+
+U32 IREmitter::LocalInvocationIdX() {
+    return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
+}
+
+U32 IREmitter::LocalInvocationIdY() {
+    return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)};
+}
+
+U32 IREmitter::LocalInvocationIdZ() {
+    return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)};
+}
+
+U32 IREmitter::InvocationId() {
+    return Inst<U32>(Opcode::InvocationId);
+}
+
+U32 IREmitter::SampleId() {
+    return Inst<U32>(Opcode::SampleId);
+}
+
+U1 IREmitter::IsHelperInvocation() {
+    return Inst<U1>(Opcode::IsHelperInvocation);
+}
+
+F32 IREmitter::YDirection() {
+    return Inst<F32>(Opcode::YDirection);
+}
+
+U32 IREmitter::LaneId() {
+    return Inst<U32>(Opcode::LaneId);
+}
+
+U32 IREmitter::LoadGlobalU8(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalU8, address);
+}
+
+U32 IREmitter::LoadGlobalS8(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalS8, address);
+}
+
+U32 IREmitter::LoadGlobalU16(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalU16, address);
+}
+
+U32 IREmitter::LoadGlobalS16(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalS16, address);
+}
+
+U32 IREmitter::LoadGlobal32(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobal32, address);
+}
+
+Value IREmitter::LoadGlobal64(const U64& address) {
+    return Inst<Value>(Opcode::LoadGlobal64, address);
+}
+
+Value IREmitter::LoadGlobal128(const U64& address) {
+    return Inst<Value>(Opcode::LoadGlobal128, address);
+}
+
+void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
+    Inst(Opcode::WriteGlobalU8, address, value);
+}
+
+void IREmitter::WriteGlobalS8(const U64& address, const U32& value) {
+    Inst(Opcode::WriteGlobalS8, address, value);
+}
+
+void IREmitter::WriteGlobalU16(const U64& address, const U32& value) {
+    Inst(Opcode::WriteGlobalU16, address, value);
+}
+
+void IREmitter::WriteGlobalS16(const U64& address, const U32& value) {
+    Inst(Opcode::WriteGlobalS16, address, value);
+}
+
+void IREmitter::WriteGlobal32(const U64& address, const U32& value) {
+    Inst(Opcode::WriteGlobal32, address, value);
+}
+
+void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) {
+    Inst(Opcode::WriteGlobal64, address, vector);
+}
+
+void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
+    Inst(Opcode::WriteGlobal128, address, vector);
+}
+
+U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
+    return Inst<U32>(Opcode::LoadLocal, word_offset);
+}
+
+void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
+    Inst(Opcode::WriteLocal, word_offset, value);
+}
+
+Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
+    switch (bit_size) {
+    case 8:
+        return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
+    case 16:
+        return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
+    case 32:
+        return Inst(Opcode::LoadSharedU32, offset);
+    case 64:
+        return Inst(Opcode::LoadSharedU64, offset);
+    case 128:
+        return Inst(Opcode::LoadSharedU128, offset);
+    }
+    throw InvalidArgument("Invalid bit size {}", bit_size);
+}
+
+void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
+    switch (bit_size) {
+    case 8:
+        Inst(Opcode::WriteSharedU8, offset, value);
+        break;
+    case 16:
+        Inst(Opcode::WriteSharedU16, offset, value);
+        break;
+    case 32:
+        Inst(Opcode::WriteSharedU32, offset, value);
+        break;
+    case 64:
+        Inst(Opcode::WriteSharedU64, offset, value);
+        break;
+    case 128:
+        Inst(Opcode::WriteSharedU128, offset, value);
+        break;
+    default:
+        throw InvalidArgument("Invalid bit size {}", bit_size);
+    }
+}
+
+U1 IREmitter::GetZeroFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetZeroFromOp, op);
+}
+
+U1 IREmitter::GetSignFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetSignFromOp, op);
+}
+
+U1 IREmitter::GetCarryFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetCarryFromOp, op);
+}
+
+U1 IREmitter::GetOverflowFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetOverflowFromOp, op);
+}
+
+U1 IREmitter::GetSparseFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetSparseFromOp, op);
+}
+
+U1 IREmitter::GetInBoundsFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetInBoundsFromOp, op);
+}
+
+F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
+    if (a.Type() != b.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
+    if (e1.Type() != e2.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
+    }
+    switch (e1.Type()) {
+    case Type::U32:
+        return Inst(Opcode::CompositeConstructU32x2, e1, e2);
+    case Type::F16:
+        return Inst(Opcode::CompositeConstructF16x2, e1, e2);
+    case Type::F32:
+        return Inst(Opcode::CompositeConstructF32x2, e1, e2);
+    case Type::F64:
+        return Inst(Opcode::CompositeConstructF64x2, e1, e2);
+    default:
+        ThrowInvalidType(e1.Type());
+    }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) {
+    if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
+        throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
+    }
+    switch (e1.Type()) {
+    case Type::U32:
+        return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3);
+    case Type::F16:
+        return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3);
+    case Type::F32:
+        return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3);
+    case Type::F64:
+        return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3);
+    default:
+        ThrowInvalidType(e1.Type());
+    }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
+                                    const Value& e4) {
+    if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
+        throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
+                              e3.Type(), e4.Type());
+    }
+    switch (e1.Type()) {
+    case Type::U32:
+        return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
+    case Type::F16:
+        return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4);
+    case Type::F32:
+        return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
+    case Type::F64:
+        return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
+    default:
+        ThrowInvalidType(e1.Type());
+    }
+}
+
+Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
+    const auto read{[&](Opcode opcode, size_t limit) -> Value {
+        if (element >= limit) {
+            throw InvalidArgument("Out of bounds element {}", element);
+        }
+        return Inst(opcode, vector, Value{static_cast<u32>(element)});
+    }};
+    switch (vector.Type()) {
+    case Type::U32x2:
+        return read(Opcode::CompositeExtractU32x2, 2);
+    case Type::U32x3:
+        return read(Opcode::CompositeExtractU32x3, 3);
+    case Type::U32x4:
+        return read(Opcode::CompositeExtractU32x4, 4);
+    case Type::F16x2:
+        return read(Opcode::CompositeExtractF16x2, 2);
+    case Type::F16x3:
+        return read(Opcode::CompositeExtractF16x3, 3);
+    case Type::F16x4:
+        return read(Opcode::CompositeExtractF16x4, 4);
+    case Type::F32x2:
+        return read(Opcode::CompositeExtractF32x2, 2);
+    case Type::F32x3:
+        return read(Opcode::CompositeExtractF32x3, 3);
+    case Type::F32x4:
+        return read(Opcode::CompositeExtractF32x4, 4);
+    case Type::F64x2:
+        return read(Opcode::CompositeExtractF64x2, 2);
+    case Type::F64x3:
+        return read(Opcode::CompositeExtractF64x3, 3);
+    case Type::F64x4:
+        return read(Opcode::CompositeExtractF64x4, 4);
+    default:
+        ThrowInvalidType(vector.Type());
+    }
+}
+
+Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
+    const auto insert{[&](Opcode opcode, size_t limit) {
+        if (element >= limit) {
+            throw InvalidArgument("Out of bounds element {}", element);
+        }
+        return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
+    }};
+    switch (vector.Type()) {
+    case Type::U32x2:
+        return insert(Opcode::CompositeInsertU32x2, 2);
+    case Type::U32x3:
+        return insert(Opcode::CompositeInsertU32x3, 3);
+    case Type::U32x4:
+        return insert(Opcode::CompositeInsertU32x4, 4);
+    case Type::F16x2:
+        return insert(Opcode::CompositeInsertF16x2, 2);
+    case Type::F16x3:
+        return insert(Opcode::CompositeInsertF16x3, 3);
+    case Type::F16x4:
+        return insert(Opcode::CompositeInsertF16x4, 4);
+    case Type::F32x2:
+        return insert(Opcode::CompositeInsertF32x2, 2);
+    case Type::F32x3:
+        return insert(Opcode::CompositeInsertF32x3, 3);
+    case Type::F32x4:
+        return insert(Opcode::CompositeInsertF32x4, 4);
+    case Type::F64x2:
+        return insert(Opcode::CompositeInsertF64x2, 2);
+    case Type::F64x3:
+        return insert(Opcode::CompositeInsertF64x3, 3);
+    case Type::F64x4:
+        return insert(Opcode::CompositeInsertF64x4, 4);
+    default:
+        ThrowInvalidType(vector.Type());
+    }
+}
+
+Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
+    if (true_value.Type() != false_value.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
+    }
+    switch (true_value.Type()) {
+    case Type::U1:
+        return Inst(Opcode::SelectU1, condition, true_value, false_value);
+    case Type::U8:
+        return Inst(Opcode::SelectU8, condition, true_value, false_value);
+    case Type::U16:
+        return Inst(Opcode::SelectU16, condition, true_value, false_value);
+    case Type::U32:
+        return Inst(Opcode::SelectU32, condition, true_value, false_value);
+    case Type::U64:
+        return Inst(Opcode::SelectU64, condition, true_value, false_value);
+    case Type::F32:
+        return Inst(Opcode::SelectF32, condition, true_value, false_value);
+    case Type::F64:
+        return Inst(Opcode::SelectF64, condition, true_value, false_value);
+    default:
+        throw InvalidArgument("Invalid type {}", true_value.Type());
+    }
+}
+
+template <>
+IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
+    return Inst<IR::U32>(Opcode::BitCastU32F32, value);
+}
+
+template <>
+IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
+    return Inst<IR::F32>(Opcode::BitCastF32U32, value);
+}
+
+template <>
+IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) {
+    return Inst<IR::U16>(Opcode::BitCastU16F16, value);
+}
+
+template <>
+IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
+    return Inst<IR::F16>(Opcode::BitCastF16U16, value);
+}
+
+template <>
+IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
+    return Inst<IR::U64>(Opcode::BitCastU64F64, value);
+}
+
+template <>
+IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
+    return Inst<IR::F64>(Opcode::BitCastF64U64, value);
+}
+
+U64 IREmitter::PackUint2x32(const Value& vector) {
+    return Inst<U64>(Opcode::PackUint2x32, vector);
+}
+
+Value IREmitter::UnpackUint2x32(const U64& value) {
+    return Inst<Value>(Opcode::UnpackUint2x32, value);
+}
+
+U32 IREmitter::PackFloat2x16(const Value& vector) {
+    return Inst<U32>(Opcode::PackFloat2x16, vector);
+}
+
+Value IREmitter::UnpackFloat2x16(const U32& value) {
+    return Inst(Opcode::UnpackFloat2x16, value);
+}
+
+U32 IREmitter::PackHalf2x16(const Value& vector) {
+    return Inst<U32>(Opcode::PackHalf2x16, vector);
+}
+
+Value IREmitter::UnpackHalf2x16(const U32& value) {
+    return Inst(Opcode::UnpackHalf2x16, value);
+}
+
+F64 IREmitter::PackDouble2x32(const Value& vector) {
+    return Inst<F64>(Opcode::PackDouble2x32, vector);
+}
+
+Value IREmitter::UnpackDouble2x32(const F64& value) {
+    return Inst<Value>(Opcode::UnpackDouble2x32, value);
+}
+
+F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) {
+    if (a.Type() != b.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
+                           FpControl control) {
+    if (a.Type() != b.Type() || a.Type() != c.Type()) {
+        throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
+    }
+    switch (a.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPAbs16, value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPAbs32, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPAbs64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPNeg16, value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPNeg32, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPNeg64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
+    F16F32F64 result{value};
+    if (abs) {
+        result = FPAbs(result);
+    }
+    if (neg) {
+        result = FPNeg(result);
+    }
+    return result;
+}
+
+F32 IREmitter::FPCos(const F32& value) {
+    return Inst<F32>(Opcode::FPCos, value);
+}
+
+F32 IREmitter::FPSin(const F32& value) {
+    return Inst<F32>(Opcode::FPSin, value);
+}
+
+F32 IREmitter::FPExp2(const F32& value) {
+    return Inst<F32>(Opcode::FPExp2, value);
+}
+
+F32 IREmitter::FPLog2(const F32& value) {
+    return Inst<F32>(Opcode::FPLog2, value);
+}
+
+F32F64 IREmitter::FPRecip(const F32F64& value) {
+    switch (value.Type()) {
+    case Type::F32:
+        return Inst<F32>(Opcode::FPRecip32, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPRecip64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
+    switch (value.Type()) {
+    case Type::F32:
+        return Inst<F32>(Opcode::FPRecipSqrt32, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPRecipSqrt64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F32 IREmitter::FPSqrt(const F32& value) {
+    return Inst<F32>(Opcode::FPSqrt, value);
+}
+
+F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPSaturate16, value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPSaturate32, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPSaturate64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
+                             const F16F32F64& max_value) {
+    if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
+        throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
+                              max_value.Type());
+    }
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) {
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPFloor16, Flags{control}, value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPFloor32, Flags{control}, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPFloor64, Flags{control}, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) {
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPCeil16, Flags{control}, value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPCeil32, Flags{control}, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPCeil64, Flags{control}, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control},
+                        lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control},
+                        lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control},
+                        lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+                         bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16,
+                        Flags{control}, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32,
+                        Flags{control}, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64,
+                        Flags{control}, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+                         bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16,
+                        Flags{control}, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32,
+                        Flags{control}, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64,
+                        Flags{control}, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+                            bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16,
+                        Flags{control}, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32,
+                        Flags{control}, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64,
+                        Flags{control}, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+                              bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
+                        Flags{control}, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
+                        Flags{control}, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
+                        Flags{control}, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+                                 bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
+                                : Opcode::FPUnordGreaterThanEqual16,
+                        Flags{control}, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
+                                : Opcode::FPUnordGreaterThanEqual32,
+                        Flags{control}, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
+                                : Opcode::FPUnordGreaterThanEqual64,
+                        Flags{control}, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPIsNan(const F16F32F64& value) {
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<U1>(Opcode::FPIsNan16, value);
+    case Type::F32:
+        return Inst<U1>(Opcode::FPIsNan32, value);
+    case Type::F64:
+        return Inst<U1>(Opcode::FPIsNan64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
+}
+
+U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
+}
+
+F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F32:
+        return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F32:
+        return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
+    if (a.Type() != b.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::IAdd32, a, b);
+    case Type::U64:
+        return Inst<U64>(Opcode::IAdd64, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
+    if (a.Type() != b.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::ISub32, a, b);
+    case Type::U64:
+        return Inst<U64>(Opcode::ISub64, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+U32 IREmitter::IMul(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::IMul32, a, b);
+}
+
+U32U64 IREmitter::INeg(const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::INeg32, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::INeg64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32 IREmitter::IAbs(const U32& value) {
+    return Inst<U32>(Opcode::IAbs32, value);
+}
+
+U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) {
+    switch (base.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
+    case Type::U64:
+        return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift);
+    default:
+        ThrowInvalidType(base.Type());
+    }
+}
+
+U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) {
+    switch (base.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
+    case Type::U64:
+        return Inst<U64>(Opcode::ShiftRightLogical64, base, shift);
+    default:
+        ThrowInvalidType(base.Type());
+    }
+}
+
+U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
+    switch (base.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
+    case Type::U64:
+        return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift);
+    default:
+        ThrowInvalidType(base.Type());
+    }
+}
+
+U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseAnd32, a, b);
+}
+
+U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseOr32, a, b);
+}
+
+U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseXor32, a, b);
+}
+
+U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+                              const U32& count) {
+    return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
+}
+
+U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+                               bool is_signed) {
+    return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
+                     count);
+}
+
+U32 IREmitter::BitReverse(const U32& value) {
+    return Inst<U32>(Opcode::BitReverse32, value);
+}
+
+U32 IREmitter::BitCount(const U32& value) {
+    return Inst<U32>(Opcode::BitCount32, value);
+}
+
+U32 IREmitter::BitwiseNot(const U32& value) {
+    return Inst<U32>(Opcode::BitwiseNot32, value);
+}
+
+U32 IREmitter::FindSMsb(const U32& value) {
+    return Inst<U32>(Opcode::FindSMsb32, value);
+}
+
+U32 IREmitter::FindUMsb(const U32& value) {
+    return Inst<U32>(Opcode::FindUMsb32, value);
+}
+
+U32 IREmitter::SMin(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::SMin32, a, b);
+}
+
+U32 IREmitter::UMin(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::UMin32, a, b);
+}
+
+U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) {
+    return is_signed ? SMin(a, b) : UMin(a, b);
+}
+
+U32 IREmitter::SMax(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::SMax32, a, b);
+}
+
+U32 IREmitter::UMax(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::UMax32, a, b);
+}
+
+U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) {
+    return is_signed ? SMax(a, b) : UMax(a, b);
+}
+
+U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) {
+    return Inst<U32>(Opcode::SClamp32, value, min, max);
+}
+
+U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) {
+    return Inst<U32>(Opcode::UClamp32, value, min, max);
+}
+
+U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
+}
+
+U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::U32:
+        return Inst<U1>(Opcode::IEqual, lhs, rhs);
+    case Type::U64: {
+        // Manually compare the unpacked values
+        const Value lhs_vector{UnpackUint2x32(lhs)};
+        const Value rhs_vector{UnpackUint2x32(rhs)};
+        return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)},
+                                 IR::U32{CompositeExtract(rhs_vector, 0)}),
+                          IEqual(IR::U32{CompositeExtract(lhs_vector, 1)},
+                                 IR::U32{CompositeExtract(rhs_vector, 1)}));
+    }
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
+}
+
+U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
+    return Inst<U1>(Opcode::INotEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
+}
+
+U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
+    return is_signed ? SharedAtomicSMin(pointer_offset, value)
+                     : SharedAtomicUMin(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
+    return is_signed ? SharedAtomicSMax(pointer_offset, value)
+                     : SharedAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
+}
+
+U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
+    default:
+        ThrowInvalidType(pointer_offset.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+    return is_signed ? GlobalAtomicSMin(pointer_offset, value)
+                     : GlobalAtomicUMin(pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+    return is_signed ? GlobalAtomicSMax(pointer_offset, value)
+                     : GlobalAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
+    default:
+        ThrowInvalidType(pointer_offset.Type());
+    }
+}
+
+F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+                                  const FpControl control) {
+    return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
+}
+
+U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
+    return Inst<U1>(Opcode::LogicalOr, a, b);
+}
+
+U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
+    return Inst<U1>(Opcode::LogicalAnd, a, b);
+}
+
+U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
+    return Inst<U1>(Opcode::LogicalXor, a, b);
+}
+
+U1 IREmitter::LogicalNot(const U1& value) {
+    return Inst<U1>(Opcode::LogicalNot, value);
+}
+
+U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
+    switch (bitsize) {
+    case 16:
+        switch (value.Type()) {
+        case Type::F16:
+            return Inst<U32>(Opcode::ConvertS16F16, value);
+        case Type::F32:
+            return Inst<U32>(Opcode::ConvertS16F32, value);
+        case Type::F64:
+            return Inst<U32>(Opcode::ConvertS16F64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 32:
+        switch (value.Type()) {
+        case Type::F16:
+            return Inst<U32>(Opcode::ConvertS32F16, value);
+        case Type::F32:
+            return Inst<U32>(Opcode::ConvertS32F32, value);
+        case Type::F64:
+            return Inst<U32>(Opcode::ConvertS32F64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 64:
+        switch (value.Type()) {
+        case Type::F16:
+            return Inst<U64>(Opcode::ConvertS64F16, value);
+        case Type::F32:
+            return Inst<U64>(Opcode::ConvertS64F32, value);
+        case Type::F64:
+            return Inst<U64>(Opcode::ConvertS64F64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    default:
+        throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+    }
+}
+
+U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
+    switch (bitsize) {
+    case 16:
+        switch (value.Type()) {
+        case Type::F16:
+            return Inst<U32>(Opcode::ConvertU16F16, value);
+        case Type::F32:
+            return Inst<U32>(Opcode::ConvertU16F32, value);
+        case Type::F64:
+            return Inst<U32>(Opcode::ConvertU16F64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 32:
+        switch (value.Type()) {
+        case Type::F16:
+            return Inst<U32>(Opcode::ConvertU32F16, value);
+        case Type::F32:
+            return Inst<U32>(Opcode::ConvertU32F32, value);
+        case Type::F64:
+            return Inst<U32>(Opcode::ConvertU32F64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 64:
+        switch (value.Type()) {
+        case Type::F16:
+            return Inst<U64>(Opcode::ConvertU64F16, value);
+        case Type::F32:
+            return Inst<U64>(Opcode::ConvertU64F32, value);
+        case Type::F64:
+            return Inst<U64>(Opcode::ConvertU64F64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    default:
+        throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+    }
+}
+
+U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
+    return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value);
+}
+
+F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+                                 FpControl control) {
+    switch (dest_bitsize) {
+    case 16:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value);
+        case 16:
+            return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value);
+        case 32:
+            return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value);
+        case 64:
+            return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value);
+        }
+        break;
+    case 32:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value);
+        case 16:
+            return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value);
+        case 32:
+            return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value);
+        case 64:
+            return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value);
+        }
+        break;
+    case 64:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value);
+        case 16:
+            return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value);
+        case 32:
+            return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value);
+        case 64:
+            return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value);
+        }
+        break;
+    }
+    throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
+}
+
+F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+                                 FpControl control) {
+    switch (dest_bitsize) {
+    case 16:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value);
+        case 16:
+            return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value);
+        case 32:
+            return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value);
+        case 64:
+            return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value);
+        }
+        break;
+    case 32:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value);
+        case 16:
+            return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value);
+        case 32:
+            return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value);
+        case 64:
+            return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value);
+        }
+        break;
+    case 64:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value);
+        case 16:
+            return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value);
+        case 32:
+            return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value);
+        case 64:
+            return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value);
+        }
+        break;
+    }
+    throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
+}
+
+F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
+                                 const Value& value, FpControl control) {
+    return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control)
+                     : ConvertUToF(dest_bitsize, src_bitsize, value, control);
+}
+
+U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
+    switch (result_bitsize) {
+    case 32:
+        switch (value.Type()) {
+        case Type::U32:
+            // Nothing to do
+            return value;
+        case Type::U64:
+            return Inst<U32>(Opcode::ConvertU32U64, value);
+        default:
+            break;
+        }
+        break;
+    case 64:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<U64>(Opcode::ConvertU64U32, value);
+        case Type::U64:
+            // Nothing to do
+            return value;
+        default:
+            break;
+        }
+    }
+    throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
+}
+
+F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) {
+    switch (result_bitsize) {
+    case 16:
+        switch (value.Type()) {
+        case Type::F16:
+            // Nothing to do
+            return value;
+        case Type::F32:
+            return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value);
+        case Type::F64:
+            throw LogicError("Illegal conversion from F64 to F16");
+        default:
+            break;
+        }
+        break;
+    case 32:
+        switch (value.Type()) {
+        case Type::F16:
+            return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value);
+        case Type::F32:
+            // Nothing to do
+            return value;
+        case Type::F64:
+            return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value);
+        default:
+            break;
+        }
+        break;
+    case 64:
+        switch (value.Type()) {
+        case Type::F16:
+            throw LogicError("Illegal conversion from F16 to F64");
+        case Type::F32:
+            return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value);
+        case Type::F64:
+            // Nothing to do
+            return value;
+        default:
+            break;
+        }
+        break;
+    }
+    throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
+}
+
+Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
+                                        const Value& offset, const F32& lod_clamp,
+                                        TextureInstInfo info) {
+    const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
+                                         : Opcode::BindlessImageSampleImplicitLod};
+    return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
+}
+
+Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
+                                        const Value& offset, TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
+                                         : Opcode::BindlessImageSampleExplicitLod};
+    return Inst(op, Flags{info}, handle, coords, lod, offset);
+}
+
+F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
+                                          const F32& bias, const Value& offset,
+                                          const F32& lod_clamp, TextureInstInfo info) {
+    const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
+                                         : Opcode::BindlessImageSampleDrefImplicitLod};
+    return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
+}
+
+F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
+                                          const F32& lod, const Value& offset,
+                                          TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
+                                         : Opcode::BindlessImageSampleDrefExplicitLod};
+    return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset);
+}
+
+Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
+                             const Value& offset2, TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather};
+    return Inst(op, Flags{info}, handle, coords, offset, offset2);
+}
+
+Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
+                                 const Value& offset2, const F32& dref, TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref
+                                         : Opcode::BindlessImageGatherDref};
+    return Inst(op, Flags{info}, handle, coords, offset, offset2, dref);
+}
+
+Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
+                            const U32& lod, const U32& multisampling, TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch};
+    return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling);
+}
+
+Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions
+                                         : Opcode::BindlessImageQueryDimensions};
+    return Inst(op, handle, lod);
+}
+
+Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod
+                                         : Opcode::BindlessImageQueryLod};
+    return Inst(op, Flags{info}, handle, coords);
+}
+
+Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates,
+                               const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
+                                         : Opcode::BindlessImageGradient};
+    return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp);
+}
+
+Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead};
+    return Inst(op, Flags{info}, handle, coords);
+}
+
+void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
+                           TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
+    Inst(op, Flags{info}, handle, coords, color);
+}
+
+Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32
+                                         : Opcode::BindlessImageAtomicIAdd32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32
+                                         : Opcode::BindlessImageAtomicSMin32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32
+                                         : Opcode::BindlessImageAtomicUMin32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
+                                 bool is_signed, TextureInstInfo info) {
+    return is_signed ? ImageAtomicSMin(handle, coords, value, info)
+                     : ImageAtomicUMin(handle, coords, value, info);
+}
+
+Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32
+                                         : Opcode::BindlessImageAtomicSMax32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32
+                                         : Opcode::BindlessImageAtomicUMax32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
+                                 bool is_signed, TextureInstInfo info) {
+    return is_signed ? ImageAtomicSMax(handle, coords, value, info)
+                     : ImageAtomicUMax(handle, coords, value, info);
+}
+
+Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
+                                TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32
+                                         : Opcode::BindlessImageAtomicInc32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
+                                TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32
+                                         : Opcode::BindlessImageAtomicDec32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
+                                TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32
+                                         : Opcode::BindlessImageAtomicAnd32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
+                               TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32
+                                         : Opcode::BindlessImageAtomicOr32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
+                                TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32
+                                         : Opcode::BindlessImageAtomicXor32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
+                                     TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32
+                                         : Opcode::BindlessImageAtomicExchange32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+U1 IREmitter::VoteAll(const U1& value) {
+    return Inst<U1>(Opcode::VoteAll, value);
+}
+
+U1 IREmitter::VoteAny(const U1& value) {
+    return Inst<U1>(Opcode::VoteAny, value);
+}
+
+U1 IREmitter::VoteEqual(const U1& value) {
+    return Inst<U1>(Opcode::VoteEqual, value);
+}
+
+U32 IREmitter::SubgroupBallot(const U1& value) {
+    return Inst<U32>(Opcode::SubgroupBallot, value);
+}
+
+U32 IREmitter::SubgroupEqMask() {
+    return Inst<U32>(Opcode::SubgroupEqMask);
+}
+
+U32 IREmitter::SubgroupLtMask() {
+    return Inst<U32>(Opcode::SubgroupLtMask);
+}
+
+U32 IREmitter::SubgroupLeMask() {
+    return Inst<U32>(Opcode::SubgroupLeMask);
+}
+
+U32 IREmitter::SubgroupGtMask() {
+    return Inst<U32>(Opcode::SubgroupGtMask);
+}
+
+U32 IREmitter::SubgroupGeMask() {
+    return Inst<U32>(Opcode::SubgroupGeMask);
+}
+
+U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                            const IR::U32& seg_mask) {
+    return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                         const IR::U32& seg_mask) {
+    return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                           const IR::U32& seg_mask) {
+    return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                                const IR::U32& seg_mask) {
+    return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
+}
+
+F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
+    return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
+}
+
+F32 IREmitter::DPdxFine(const F32& a) {
+    return Inst<F32>(Opcode::DPdxFine, a);
+}
+
+F32 IREmitter::DPdyFine(const F32& a) {
+    return Inst<F32>(Opcode::DPdyFine, a);
+}
+
+F32 IREmitter::DPdxCoarse(const F32& a) {
+    return Inst<F32>(Opcode::DPdxCoarse, a);
+}
+
+F32 IREmitter::DPdyCoarse(const F32& a) {
+    return Inst<F32>(Opcode::DPdyCoarse, a);
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
new file mode 100644
index 000000000..53f7b3b06
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -0,0 +1,413 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstring>
+#include <type_traits>
+
+#include "shader_recompiler/frontend/ir/attribute.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class IREmitter {
+public:
+    explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
+    explicit IREmitter(Block& block_, Block::iterator insertion_point_)
+        : block{&block_}, insertion_point{insertion_point_} {}
+
+    Block* block;
+
+    [[nodiscard]] U1 Imm1(bool value) const;
+    [[nodiscard]] U8 Imm8(u8 value) const;
+    [[nodiscard]] U16 Imm16(u16 value) const;
+    [[nodiscard]] U32 Imm32(u32 value) const;
+    [[nodiscard]] U32 Imm32(s32 value) const;
+    [[nodiscard]] F32 Imm32(f32 value) const;
+    [[nodiscard]] U64 Imm64(u64 value) const;
+    [[nodiscard]] U64 Imm64(s64 value) const;
+    [[nodiscard]] F64 Imm64(f64 value) const;
+
+    U1 ConditionRef(const U1& value);
+    void Reference(const Value& value);
+
+    void PhiMove(IR::Inst& phi, const Value& value);
+
+    void Prologue();
+    void Epilogue();
+    void DemoteToHelperInvocation();
+    void EmitVertex(const U32& stream);
+    void EndPrimitive(const U32& stream);
+
+    [[nodiscard]] U32 GetReg(IR::Reg reg);
+    void SetReg(IR::Reg reg, const U32& value);
+
+    [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
+    void SetPred(IR::Pred pred, const U1& value);
+
+    [[nodiscard]] U1 GetGotoVariable(u32 id);
+    void SetGotoVariable(u32 id, const U1& value);
+
+    [[nodiscard]] U32 GetIndirectBranchVariable();
+    void SetIndirectBranchVariable(const U32& value);
+
+    [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
+    [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
+                                bool is_signed);
+    [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);
+
+    [[nodiscard]] U1 GetZFlag();
+    [[nodiscard]] U1 GetSFlag();
+    [[nodiscard]] U1 GetCFlag();
+    [[nodiscard]] U1 GetOFlag();
+
+    void SetZFlag(const U1& value);
+    void SetSFlag(const U1& value);
+    void SetCFlag(const U1& value);
+    void SetOFlag(const U1& value);
+
+    [[nodiscard]] U1 Condition(IR::Condition cond);
+    [[nodiscard]] U1 GetFlowTestResult(FlowTest test);
+
+    [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
+    [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
+    void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
+
+    [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
+    [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
+    void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
+
+    [[nodiscard]] F32 GetPatch(Patch patch);
+    void SetPatch(Patch patch, const F32& value);
+
+    void SetFragColor(u32 index, u32 component, const F32& value);
+    void SetSampleMask(const U32& value);
+    void SetFragDepth(const F32& value);
+
+    [[nodiscard]] U32 WorkgroupIdX();
+    [[nodiscard]] U32 WorkgroupIdY();
+    [[nodiscard]] U32 WorkgroupIdZ();
+
+    [[nodiscard]] Value LocalInvocationId();
+    [[nodiscard]] U32 LocalInvocationIdX();
+    [[nodiscard]] U32 LocalInvocationIdY();
+    [[nodiscard]] U32 LocalInvocationIdZ();
+
+    [[nodiscard]] U32 InvocationId();
+    [[nodiscard]] U32 SampleId();
+    [[nodiscard]] U1 IsHelperInvocation();
+    [[nodiscard]] F32 YDirection();
+
+    [[nodiscard]] U32 LaneId();
+
+    [[nodiscard]] U32 LoadGlobalU8(const U64& address);
+    [[nodiscard]] U32 LoadGlobalS8(const U64& address);
+    [[nodiscard]] U32 LoadGlobalU16(const U64& address);
+    [[nodiscard]] U32 LoadGlobalS16(const U64& address);
+    [[nodiscard]] U32 LoadGlobal32(const U64& address);
+    [[nodiscard]] Value LoadGlobal64(const U64& address);
+    [[nodiscard]] Value LoadGlobal128(const U64& address);
+
+    void WriteGlobalU8(const U64& address, const U32& value);
+    void WriteGlobalS8(const U64& address, const U32& value);
+    void WriteGlobalU16(const U64& address, const U32& value);
+    void WriteGlobalS16(const U64& address, const U32& value);
+    void WriteGlobal32(const U64& address, const U32& value);
+    void WriteGlobal64(const U64& address, const IR::Value& vector);
+    void WriteGlobal128(const U64& address, const IR::Value& vector);
+
+    [[nodiscard]] U32 LoadLocal(const U32& word_offset);
+    void WriteLocal(const U32& word_offset, const U32& value);
+
+    [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
+    void WriteShared(int bit_size, const U32& offset, const Value& value);
+
+    [[nodiscard]] U1 GetZeroFromOp(const Value& op);
+    [[nodiscard]] U1 GetSignFromOp(const Value& op);
+    [[nodiscard]] U1 GetCarryFromOp(const Value& op);
+    [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
+    [[nodiscard]] U1 GetSparseFromOp(const Value& op);
+    [[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
+
+    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
+    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
+    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
+                                           const Value& e4);
+    [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
+    [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
+
+    [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
+                               const Value& false_value);
+
+    void Barrier();
+    void WorkgroupMemoryBarrier();
+    void DeviceMemoryBarrier();
+
+    template <typename Dest, typename Source>
+    [[nodiscard]] Dest BitCast(const Source& value);
+
+    [[nodiscard]] U64 PackUint2x32(const Value& vector);
+    [[nodiscard]] Value UnpackUint2x32(const U64& value);
+
+    [[nodiscard]] U32 PackFloat2x16(const Value& vector);
+    [[nodiscard]] Value UnpackFloat2x16(const U32& value);
+
+    [[nodiscard]] U32 PackHalf2x16(const Value& vector);
+    [[nodiscard]] Value UnpackHalf2x16(const U32& value);
+
+    [[nodiscard]] F64 PackDouble2x32(const Value& vector);
+    [[nodiscard]] Value UnpackDouble2x32(const F64& value);
+
+    [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
+    [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
+    [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
+                                  FpControl control = {});
+
+    [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
+
+    [[nodiscard]] F32 FPCos(const F32& value);
+    [[nodiscard]] F32 FPSin(const F32& value);
+    [[nodiscard]] F32 FPExp2(const F32& value);
+    [[nodiscard]] F32 FPLog2(const F32& value);
+    [[nodiscard]] F32F64 FPRecip(const F32F64& value);
+    [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
+    [[nodiscard]] F32 FPSqrt(const F32& value);
+    [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
+                                    const F16F32F64& max_value);
+    [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
+    [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
+    [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
+    [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
+
+    [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+                             bool ordered = true);
+    [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+                                bool ordered = true);
+    [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+                                bool ordered = true);
+    [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
+                                   FpControl control = {}, bool ordered = true);
+    [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+                                     FpControl control = {}, bool ordered = true);
+    [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+                                        FpControl control = {}, bool ordered = true);
+    [[nodiscard]] U1 FPIsNan(const F16F32F64& value);
+    [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
+    [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
+    [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
+    [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
+
+    [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
+    [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
+    [[nodiscard]] U32 IMul(const U32& a, const U32& b);
+    [[nodiscard]] U32U64 INeg(const U32U64& value);
+    [[nodiscard]] U32 IAbs(const U32& value);
+    [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
+    [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
+    [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
+    [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+                                     const U32& count);
+    [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+                                      bool is_signed = false);
+    [[nodiscard]] U32 BitReverse(const U32& value);
+    [[nodiscard]] U32 BitCount(const U32& value);
+    [[nodiscard]] U32 BitwiseNot(const U32& value);
+
+    [[nodiscard]] U32 FindSMsb(const U32& value);
+    [[nodiscard]] U32 FindUMsb(const U32& value);
+    [[nodiscard]] U32 SMin(const U32& a, const U32& b);
+    [[nodiscard]] U32 UMin(const U32& a, const U32& b);
+    [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
+    [[nodiscard]] U32 SMax(const U32& a, const U32& b);
+    [[nodiscard]] U32 UMax(const U32& a, const U32& b);
+    [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
+    [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
+    [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
+
+    [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
+    [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
+    [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+
+    [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
+    [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
+    [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
+
+    [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
+                                          bool is_signed);
+    [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
+                                          bool is_signed);
+    [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
+    [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
+    [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
+
+    [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+                                         const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+
+    [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
+    [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
+    [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
+    [[nodiscard]] U1 LogicalNot(const U1& value);
+
+    [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
+    [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
+    [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
+    [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+                                        FpControl control = {});
+    [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+                                        FpControl control = {});
+    [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
+                                        const Value& value, FpControl control = {});
+
+    [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
+    [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value,
+                                      FpControl control = {});
+
+    [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
+                                               const F32& bias, const Value& offset,
+                                               const F32& lod_clamp, TextureInstInfo info);
+    [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
+                                               const F32& lod, const Value& offset,
+                                               TextureInstInfo info);
+    [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
+                                                 const F32& dref, const F32& bias,
+                                                 const Value& offset, const F32& lod_clamp,
+                                                 TextureInstInfo info);
+    [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
+                                                 const F32& dref, const F32& lod,
+                                                 const Value& offset, TextureInstInfo info);
+    [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
+
+    [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
+                                      TextureInstInfo info);
+    [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
+                                    const Value& offset2, TextureInstInfo info);
+    [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
+                                        const Value& offset, const Value& offset2, const F32& dref,
+                                        TextureInstInfo info);
+    [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
+                                   const U32& lod, const U32& multisampling, TextureInstInfo info);
+    [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
+                                      const Value& derivates, const Value& offset,
+                                      const F32& lod_clamp, TextureInstInfo info);
+    [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
+    [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color,
+                                  TextureInstInfo info);
+
+    [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
+                                        const Value& value, bool is_signed, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
+                                        const Value& value, bool is_signed, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
+                                       TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
+                                       TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
+                                       TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
+                                      TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
+                                       TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
+                                            const Value& value, TextureInstInfo info);
+    [[nodiscard]] U1 VoteAll(const U1& value);
+    [[nodiscard]] U1 VoteAny(const U1& value);
+    [[nodiscard]] U1 VoteEqual(const U1& value);
+    [[nodiscard]] U32 SubgroupBallot(const U1& value);
+    [[nodiscard]] U32 SubgroupEqMask();
+    [[nodiscard]] U32 SubgroupLtMask();
+    [[nodiscard]] U32 SubgroupLeMask();
+    [[nodiscard]] U32 SubgroupGtMask();
+    [[nodiscard]] U32 SubgroupGeMask();
+    [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                                   const IR::U32& seg_mask);
+    [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                                const IR::U32& seg_mask);
+    [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                                  const IR::U32& seg_mask);
+    [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
+                                       const IR::U32& clamp, const IR::U32& seg_mask);
+    [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
+                                  FpControl control = {});
+
+    [[nodiscard]] F32 DPdxFine(const F32& a);
+
+    [[nodiscard]] F32 DPdyFine(const F32& a);
+
+    [[nodiscard]] F32 DPdxCoarse(const F32& a);
+
+    [[nodiscard]] F32 DPdyCoarse(const F32& a);
+
+private:
+    IR::Block::iterator insertion_point;
+
+    template <typename T = Value, typename... Args>
+    T Inst(Opcode op, Args... args) {
+        auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
+        return T{Value{&*it}};
+    }
+
+    template <typename T>
+    requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
+        Flags() = default;
+        Flags(T proxy_) : proxy{proxy_} {}
+
+        T proxy;
+    };
+
+    template <typename T = Value, typename FlagType, typename... Args>
+    T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
+        u32 raw_flags{};
+        std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
+        auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
+        return T{Value{&*it}};
+    }
+};
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
new file mode 100644
index 000000000..3dfa5a880
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -0,0 +1,411 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+namespace {
+void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
+    if (inst && inst->GetOpcode() != opcode) {
+        throw LogicError("Invalid pseudo-instruction");
+    }
+}
+
+void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
+    if (dest_inst) {
+        throw LogicError("Only one of each type of pseudo-op allowed");
+    }
+    dest_inst = pseudo_inst;
+}
+
+void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
+    if (inst->GetOpcode() != expected_opcode) {
+        throw LogicError("Undoing use of invalid pseudo-op");
+    }
+    inst = nullptr;
+}
+
+void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
+    if (!associated_insts) {
+        associated_insts = std::make_unique<AssociatedInsts>();
+    }
+}
+} // Anonymous namespace
+
+Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
+    if (op == Opcode::Phi) {
+        std::construct_at(&phi_args);
+    } else {
+        std::construct_at(&args);
+    }
+}
+
+Inst::~Inst() {
+    if (op == Opcode::Phi) {
+        std::destroy_at(&phi_args);
+    } else {
+        std::destroy_at(&args);
+    }
+}
+
+bool Inst::MayHaveSideEffects() const noexcept {
+    switch (op) {
+    case Opcode::ConditionRef:
+    case Opcode::Reference:
+    case Opcode::PhiMove:
+    case Opcode::Prologue:
+    case Opcode::Epilogue:
+    case Opcode::Join:
+    case Opcode::DemoteToHelperInvocation:
+    case Opcode::Barrier:
+    case Opcode::WorkgroupMemoryBarrier:
+    case Opcode::DeviceMemoryBarrier:
+    case Opcode::EmitVertex:
+    case Opcode::EndPrimitive:
+    case Opcode::SetAttribute:
+    case Opcode::SetAttributeIndexed:
+    case Opcode::SetPatch:
+    case Opcode::SetFragColor:
+    case Opcode::SetSampleMask:
+    case Opcode::SetFragDepth:
+    case Opcode::WriteGlobalU8:
+    case Opcode::WriteGlobalS8:
+    case Opcode::WriteGlobalU16:
+    case Opcode::WriteGlobalS16:
+    case Opcode::WriteGlobal32:
+    case Opcode::WriteGlobal64:
+    case Opcode::WriteGlobal128:
+    case Opcode::WriteStorageU8:
+    case Opcode::WriteStorageS8:
+    case Opcode::WriteStorageU16:
+    case Opcode::WriteStorageS16:
+    case Opcode::WriteStorage32:
+    case Opcode::WriteStorage64:
+    case Opcode::WriteStorage128:
+    case Opcode::WriteLocal:
+    case Opcode::WriteSharedU8:
+    case Opcode::WriteSharedU16:
+    case Opcode::WriteSharedU32:
+    case Opcode::WriteSharedU64:
+    case Opcode::WriteSharedU128:
+    case Opcode::SharedAtomicIAdd32:
+    case Opcode::SharedAtomicSMin32:
+    case Opcode::SharedAtomicUMin32:
+    case Opcode::SharedAtomicSMax32:
+    case Opcode::SharedAtomicUMax32:
+    case Opcode::SharedAtomicInc32:
+    case Opcode::SharedAtomicDec32:
+    case Opcode::SharedAtomicAnd32:
+    case Opcode::SharedAtomicOr32:
+    case Opcode::SharedAtomicXor32:
+    case Opcode::SharedAtomicExchange32:
+    case Opcode::SharedAtomicExchange64:
+    case Opcode::GlobalAtomicIAdd32:
+    case Opcode::GlobalAtomicSMin32:
+    case Opcode::GlobalAtomicUMin32:
+    case Opcode::GlobalAtomicSMax32:
+    case Opcode::GlobalAtomicUMax32:
+    case Opcode::GlobalAtomicInc32:
+    case Opcode::GlobalAtomicDec32:
+    case Opcode::GlobalAtomicAnd32:
+    case Opcode::GlobalAtomicOr32:
+    case Opcode::GlobalAtomicXor32:
+    case Opcode::GlobalAtomicExchange32:
+    case Opcode::GlobalAtomicIAdd64:
+    case Opcode::GlobalAtomicSMin64:
+    case Opcode::GlobalAtomicUMin64:
+    case Opcode::GlobalAtomicSMax64:
+    case Opcode::GlobalAtomicUMax64:
+    case Opcode::GlobalAtomicAnd64:
+    case Opcode::GlobalAtomicOr64:
+    case Opcode::GlobalAtomicXor64:
+    case Opcode::GlobalAtomicExchange64:
+    case Opcode::GlobalAtomicAddF32:
+    case Opcode::GlobalAtomicAddF16x2:
+    case Opcode::GlobalAtomicAddF32x2:
+    case Opcode::GlobalAtomicMinF16x2:
+    case Opcode::GlobalAtomicMinF32x2:
+    case Opcode::GlobalAtomicMaxF16x2:
+    case Opcode::GlobalAtomicMaxF32x2:
+    case Opcode::StorageAtomicIAdd32:
+    case Opcode::StorageAtomicSMin32:
+    case Opcode::StorageAtomicUMin32:
+    case Opcode::StorageAtomicSMax32:
+    case Opcode::StorageAtomicUMax32:
+    case Opcode::StorageAtomicInc32:
+    case Opcode::StorageAtomicDec32:
+    case Opcode::StorageAtomicAnd32:
+    case Opcode::StorageAtomicOr32:
+    case Opcode::StorageAtomicXor32:
+    case Opcode::StorageAtomicExchange32:
+    case Opcode::StorageAtomicIAdd64:
+    case Opcode::StorageAtomicSMin64:
+    case Opcode::StorageAtomicUMin64:
+    case Opcode::StorageAtomicSMax64:
+    case Opcode::StorageAtomicUMax64:
+    case Opcode::StorageAtomicAnd64:
+    case Opcode::StorageAtomicOr64:
+    case Opcode::StorageAtomicXor64:
+    case Opcode::StorageAtomicExchange64:
+    case Opcode::StorageAtomicAddF32:
+    case Opcode::StorageAtomicAddF16x2:
+    case Opcode::StorageAtomicAddF32x2:
+    case Opcode::StorageAtomicMinF16x2:
+    case Opcode::StorageAtomicMinF32x2:
+    case Opcode::StorageAtomicMaxF16x2:
+    case Opcode::StorageAtomicMaxF32x2:
+    case Opcode::BindlessImageWrite:
+    case Opcode::BoundImageWrite:
+    case Opcode::ImageWrite:
+    case IR::Opcode::BindlessImageAtomicIAdd32:
+    case IR::Opcode::BindlessImageAtomicSMin32:
+    case IR::Opcode::BindlessImageAtomicUMin32:
+    case IR::Opcode::BindlessImageAtomicSMax32:
+    case IR::Opcode::BindlessImageAtomicUMax32:
+    case IR::Opcode::BindlessImageAtomicInc32:
+    case IR::Opcode::BindlessImageAtomicDec32:
+    case IR::Opcode::BindlessImageAtomicAnd32:
+    case IR::Opcode::BindlessImageAtomicOr32:
+    case IR::Opcode::BindlessImageAtomicXor32:
+    case IR::Opcode::BindlessImageAtomicExchange32:
+    case IR::Opcode::BoundImageAtomicIAdd32:
+    case IR::Opcode::BoundImageAtomicSMin32:
+    case IR::Opcode::BoundImageAtomicUMin32:
+    case IR::Opcode::BoundImageAtomicSMax32:
+    case IR::Opcode::BoundImageAtomicUMax32:
+    case IR::Opcode::BoundImageAtomicInc32:
+    case IR::Opcode::BoundImageAtomicDec32:
+    case IR::Opcode::BoundImageAtomicAnd32:
+    case IR::Opcode::BoundImageAtomicOr32:
+    case IR::Opcode::BoundImageAtomicXor32:
+    case IR::Opcode::BoundImageAtomicExchange32:
+    case IR::Opcode::ImageAtomicIAdd32:
+    case IR::Opcode::ImageAtomicSMin32:
+    case IR::Opcode::ImageAtomicUMin32:
+    case IR::Opcode::ImageAtomicSMax32:
+    case IR::Opcode::ImageAtomicUMax32:
+    case IR::Opcode::ImageAtomicInc32:
+    case IR::Opcode::ImageAtomicDec32:
+    case IR::Opcode::ImageAtomicAnd32:
+    case IR::Opcode::ImageAtomicOr32:
+    case IR::Opcode::ImageAtomicXor32:
+    case IR::Opcode::ImageAtomicExchange32:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool Inst::IsPseudoInstruction() const noexcept {
+    switch (op) {
+    case Opcode::GetZeroFromOp:
+    case Opcode::GetSignFromOp:
+    case Opcode::GetCarryFromOp:
+    case Opcode::GetOverflowFromOp:
+    case Opcode::GetSparseFromOp:
+    case Opcode::GetInBoundsFromOp:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool Inst::AreAllArgsImmediates() const {
+    if (op == Opcode::Phi) {
+        throw LogicError("Testing for all arguments are immediates on phi instruction");
+    }
+    return std::all_of(args.begin(), args.begin() + NumArgs(),
+                       [](const IR::Value& value) { return value.IsImmediate(); });
+}
+
+Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
+    if (!associated_insts) {
+        return nullptr;
+    }
+    switch (opcode) {
+    case Opcode::GetZeroFromOp:
+        CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
+        return associated_insts->zero_inst;
+    case Opcode::GetSignFromOp:
+        CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
+        return associated_insts->sign_inst;
+    case Opcode::GetCarryFromOp:
+        CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
+        return associated_insts->carry_inst;
+    case Opcode::GetOverflowFromOp:
+        CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
+        return associated_insts->overflow_inst;
+    case Opcode::GetSparseFromOp:
+        CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
+        return associated_insts->sparse_inst;
+    case Opcode::GetInBoundsFromOp:
+        CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp);
+        return associated_insts->in_bounds_inst;
+    default:
+        throw InvalidArgument("{} is not a pseudo-instruction", opcode);
+    }
+}
+
+IR::Type Inst::Type() const {
+    return TypeOf(op);
+}
+
+void Inst::SetArg(size_t index, Value value) {
+    if (index >= NumArgs()) {
+        throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
+    }
+    const IR::Value arg{Arg(index)};
+    if (!arg.IsImmediate()) {
+        UndoUse(arg);
+    }
+    if (!value.IsImmediate()) {
+        Use(value);
+    }
+    if (op == Opcode::Phi) {
+        phi_args[index].second = value;
+    } else {
+        args[index] = value;
+    }
+}
+
+Block* Inst::PhiBlock(size_t index) const {
+    if (op != Opcode::Phi) {
+        throw LogicError("{} is not a Phi instruction", op);
+    }
+    if (index >= phi_args.size()) {
+        throw InvalidArgument("Out of bounds argument index {} in phi instruction");
+    }
+    return phi_args[index].first;
+}
+
+void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
+    if (!value.IsImmediate()) {
+        Use(value);
+    }
+    phi_args.emplace_back(predecessor, value);
+}
+
+void Inst::Invalidate() {
+    ClearArgs();
+    ReplaceOpcode(Opcode::Void);
+}
+
+void Inst::ClearArgs() {
+    if (op == Opcode::Phi) {
+        for (auto& pair : phi_args) {
+            IR::Value& value{pair.second};
+            if (!value.IsImmediate()) {
+                UndoUse(value);
+            }
+        }
+        phi_args.clear();
+    } else {
+        for (auto& value : args) {
+            if (!value.IsImmediate()) {
+                UndoUse(value);
+            }
+        }
+        // Reset arguments to null
+        // std::memset was measured to be faster on MSVC than std::ranges:fill
+        std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
+    }
+}
+
+void Inst::ReplaceUsesWith(Value replacement) {
+    Invalidate();
+    ReplaceOpcode(Opcode::Identity);
+    if (!replacement.IsImmediate()) {
+        Use(replacement);
+    }
+    args[0] = replacement;
+}
+
+void Inst::ReplaceOpcode(IR::Opcode opcode) {
+    if (opcode == IR::Opcode::Phi) {
+        throw LogicError("Cannot transition into Phi");
+    }
+    if (op == Opcode::Phi) {
+        // Transition out of phi arguments into non-phi
+        std::destroy_at(&phi_args);
+        std::construct_at(&args);
+    }
+    op = opcode;
+}
+
+void Inst::Use(const Value& value) {
+    Inst* const inst{value.Inst()};
+    ++inst->use_count;
+
+    std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
+    switch (op) {
+    case Opcode::GetZeroFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->zero_inst, this);
+        break;
+    case Opcode::GetSignFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->sign_inst, this);
+        break;
+    case Opcode::GetCarryFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->carry_inst, this);
+        break;
+    case Opcode::GetOverflowFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->overflow_inst, this);
+        break;
+    case Opcode::GetSparseFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->sparse_inst, this);
+        break;
+    case Opcode::GetInBoundsFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->in_bounds_inst, this);
+        break;
+    default:
+        break;
+    }
+}
+
+void Inst::UndoUse(const Value& value) {
+    Inst* const inst{value.Inst()};
+    --inst->use_count;
+
+    std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
+    switch (op) {
+    case Opcode::GetZeroFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
+        break;
+    case Opcode::GetSignFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
+        break;
+    case Opcode::GetCarryFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
+        break;
+    case Opcode::GetOverflowFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
+        break;
+    case Opcode::GetSparseFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp);
+        break;
+    case Opcode::GetInBoundsFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
+        break;
+    default:
+        break;
+    }
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 000000000..77cda1f8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,49 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::IR {
+
+enum class FmzMode : u8 {
+    DontCare, // Not specified for this instruction
+    FTZ,      // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
+    FMZ,      // Flush denorms to zero, x * 0 == 0 (D3D9)
+    None,     // Denorms are not flushed, NAN is propagated (nouveau)
+};
+
+enum class FpRounding : u8 {
+    DontCare, // Not specified for this instruction
+    RN,       // Round to nearest even,
+    RM,       // Round towards negative infinity
+    RP,       // Round towards positive infinity
+    RZ,       // Round towards zero
+};
+
+struct FpControl {
+    bool no_contraction{false};
+    FpRounding rounding{FpRounding::DontCare};
+    FmzMode fmz_mode{FmzMode::DontCare};
+};
+static_assert(sizeof(FpControl) <= sizeof(u32));
+
+union TextureInstInfo {
+    u32 raw;
+    BitField<0, 16, u32> descriptor_index;
+    BitField<16, 3, TextureType> type;
+    BitField<19, 1, u32> is_depth;
+    BitField<20, 1, u32> has_bias;
+    BitField<21, 1, u32> has_lod_clamp;
+    BitField<22, 1, u32> relaxed_precision;
+    BitField<23, 2, u32> gather_component;
+    BitField<25, 2, u32> num_derivates;
+    BitField<27, 3, ImageFormat> image_format;
+};
+static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
new file mode 100644
index 000000000..24d024ad7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -0,0 +1,15 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/frontend/ir/opcodes.h"
+
+namespace Shader::IR {
+
+std::string_view NameOf(Opcode op) {
+    return Detail::META_TABLE[static_cast<size_t>(op)].name;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
new file mode 100644
index 000000000..9ab108292
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/type.h"
+
+namespace Shader::IR {
+
+enum class Opcode {
+#define OPCODE(name, ...) name,
+#include "opcodes.inc"
+#undef OPCODE
+};
+
+namespace Detail {
+struct OpcodeMeta {
+    std::string_view name;
+    Type type;
+    std::array<Type, 5> arg_types;
+};
+
+// using enum Type;
+constexpr Type Void{Type::Void};
+constexpr Type Opaque{Type::Opaque};
+constexpr Type Reg{Type::Reg};
+constexpr Type Pred{Type::Pred};
+constexpr Type Attribute{Type::Attribute};
+constexpr Type Patch{Type::Patch};
+constexpr Type U1{Type::U1};
+constexpr Type U8{Type::U8};
+constexpr Type U16{Type::U16};
+constexpr Type U32{Type::U32};
+constexpr Type U64{Type::U64};
+constexpr Type F16{Type::F16};
+constexpr Type F32{Type::F32};
+constexpr Type F64{Type::F64};
+constexpr Type U32x2{Type::U32x2};
+constexpr Type U32x3{Type::U32x3};
+constexpr Type U32x4{Type::U32x4};
+constexpr Type F16x2{Type::F16x2};
+constexpr Type F16x3{Type::F16x3};
+constexpr Type F16x4{Type::F16x4};
+constexpr Type F32x2{Type::F32x2};
+constexpr Type F32x3{Type::F32x3};
+constexpr Type F32x4{Type::F32x4};
+constexpr Type F64x2{Type::F64x2};
+constexpr Type F64x3{Type::F64x3};
+constexpr Type F64x4{Type::F64x4};
+
+constexpr OpcodeMeta META_TABLE[]{
+#define OPCODE(name_token, type_token, ...)                                                        \
+    {                                                                                              \
+        .name{#name_token},                                                                        \
+        .type = type_token,                                                                        \
+        .arg_types{__VA_ARGS__},                                                                   \
+    },
+#include "opcodes.inc"
+#undef OPCODE
+};
+constexpr size_t CalculateNumArgsOf(Opcode op) {
+    const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
+    return static_cast<size_t>(
+        std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)));
+}
+
+constexpr u8 NUM_ARGS[]{
+#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)),
+#include "opcodes.inc"
+#undef OPCODE
+};
+} // namespace Detail
+
+/// Get return type of an opcode
+[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
+    return Detail::META_TABLE[static_cast<size_t>(op)].type;
+}
+
+/// Get the number of arguments an opcode accepts
+[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
+    return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]);
+}
+
+/// Get the required type of an argument of an opcode
+[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
+    return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
+}
+
+/// Get the name of an opcode
+[[nodiscard]] std::string_view NameOf(Opcode op);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Opcode> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::IR::Opcode& op, FormatContext& ctx) {
+        return format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
+    }
+};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
new file mode 100644
index 000000000..d91098c80
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -0,0 +1,550 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+//     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      arg4 type,      ...
+OPCODE(Phi,                                                 Opaque,                                                                                         )
+OPCODE(Identity,                                            Opaque,         Opaque,                                                                         )
+OPCODE(Void,                                                Void,                                                                                           )
+OPCODE(ConditionRef,                                        U1,             U1,                                                                             )
+OPCODE(Reference,                                           Void,           Opaque,                                                                         )
+OPCODE(PhiMove,                                             Void,           Opaque,         Opaque,                                                         )
+
+// Special operations
+OPCODE(Prologue,                                            Void,                                                                                           )
+OPCODE(Epilogue,                                            Void,                                                                                           )
+OPCODE(Join,                                                Void,                                                                                           )
+OPCODE(DemoteToHelperInvocation,                            Void,                                                                                           )
+OPCODE(EmitVertex,                                          Void,           U32,                                                                            )
+OPCODE(EndPrimitive,                                        Void,           U32,                                                                            )
+
+// Barriers
+OPCODE(Barrier,                                             Void,                                                                                           )
+OPCODE(WorkgroupMemoryBarrier,                              Void,                                                                                           )
+OPCODE(DeviceMemoryBarrier,                                 Void,                                                                                           )
+
+// Context getters/setters
+OPCODE(GetRegister,                                         U32,            Reg,                                                                            )
+OPCODE(SetRegister,                                         Void,           Reg,            U32,                                                            )
+OPCODE(GetPred,                                             U1,             Pred,                                                                           )
+OPCODE(SetPred,                                             Void,           Pred,           U1,                                                             )
+OPCODE(GetGotoVariable,                                     U1,             U32,                                                                            )
+OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                                             )
+OPCODE(GetIndirectBranchVariable,                           U32,                                                                                            )
+OPCODE(SetIndirectBranchVariable,                           Void,           U32,                                                                            )
+OPCODE(GetCbufU8,                                           U32,            U32,            U32,                                                            )
+OPCODE(GetCbufS8,                                           U32,            U32,            U32,                                                            )
+OPCODE(GetCbufU16,                                          U32,            U32,            U32,                                                            )
+OPCODE(GetCbufS16,                                          U32,            U32,            U32,                                                            )
+OPCODE(GetCbufU32,                                          U32,            U32,            U32,                                                            )
+OPCODE(GetCbufF32,                                          F32,            U32,            U32,                                                            )
+OPCODE(GetCbufU32x2,                                        U32x2,          U32,            U32,                                                            )
+OPCODE(GetAttribute,                                        F32,            Attribute,      U32,                                                            )
+OPCODE(SetAttribute,                                        Void,           Attribute,      F32,            U32,                                            )
+OPCODE(GetAttributeIndexed,                                 F32,            U32,            U32,                                                            )
+OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,            U32,                                            )
+OPCODE(GetPatch,                                            F32,            Patch,                                                                          )
+OPCODE(SetPatch,                                            Void,           Patch,          F32,                                                            )
+OPCODE(SetFragColor,                                        Void,           U32,            U32,            F32,                                            )
+OPCODE(SetSampleMask,                                       Void,           U32,                                                                            )
+OPCODE(SetFragDepth,                                        Void,           F32,                                                                            )
+OPCODE(GetZFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetSFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetCFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetOFlag,                                            U1,             Void,                                                                           )
+OPCODE(SetZFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetSFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetCFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetOFlag,                                            Void,           U1,                                                                             )
+OPCODE(WorkgroupId,                                         U32x3,                                                                                          )
+OPCODE(LocalInvocationId,                                   U32x3,                                                                                          )
+OPCODE(InvocationId,                                        U32,                                                                                            )
+OPCODE(SampleId,                                            U32,                                                                                            )
+OPCODE(IsHelperInvocation,                                  U1,                                                                                             )
+OPCODE(YDirection,                                          F32,                                                                                            )
+
+// Undefined
+OPCODE(UndefU1,                                             U1,                                                                                             )
+OPCODE(UndefU8,                                             U8,                                                                                             )
+OPCODE(UndefU16,                                            U16,                                                                                            )
+OPCODE(UndefU32,                                            U32,                                                                                            )
+OPCODE(UndefU64,                                            U64,                                                                                            )
+
+// Memory operations
+OPCODE(LoadGlobalU8,                                        U32,            Opaque,                                                                         )
+OPCODE(LoadGlobalS8,                                        U32,            Opaque,                                                                         )
+OPCODE(LoadGlobalU16,                                       U32,            Opaque,                                                                         )
+OPCODE(LoadGlobalS16,                                       U32,            Opaque,                                                                         )
+OPCODE(LoadGlobal32,                                        U32,            Opaque,                                                                         )
+OPCODE(LoadGlobal64,                                        U32x2,          Opaque,                                                                         )
+OPCODE(LoadGlobal128,                                       U32x4,          Opaque,                                                                         )
+OPCODE(WriteGlobalU8,                                       Void,           Opaque,         U32,                                                            )
+OPCODE(WriteGlobalS8,                                       Void,           Opaque,         U32,                                                            )
+OPCODE(WriteGlobalU16,                                      Void,           Opaque,         U32,                                                            )
+OPCODE(WriteGlobalS16,                                      Void,           Opaque,         U32,                                                            )
+OPCODE(WriteGlobal32,                                       Void,           Opaque,         U32,                                                            )
+OPCODE(WriteGlobal64,                                       Void,           Opaque,         U32x2,                                                          )
+OPCODE(WriteGlobal128,                                      Void,           Opaque,         U32x4,                                                          )
+
+// Storage buffer operations
+OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageS8,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                                            )
+OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorage64,                                       U32x2,          U32,            U32,                                                            )
+OPCODE(LoadStorage128,                                      U32x4,          U32,            U32,                                                            )
+OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                                          )
+OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                                          )
+
+// Local memory operations
+OPCODE(LoadLocal,                                           U32,            U32,                                                                            )
+OPCODE(WriteLocal,                                          Void,           U32,            U32,                                                            )
+
+// Shared memory operations
+OPCODE(LoadSharedU8,                                        U32,            U32,                                                                            )
+OPCODE(LoadSharedS8,                                        U32,            U32,                                                                            )
+OPCODE(LoadSharedU16,                                       U32,            U32,                                                                            )
+OPCODE(LoadSharedS16,                                       U32,            U32,                                                                            )
+OPCODE(LoadSharedU32,                                       U32,            U32,                                                                            )
+OPCODE(LoadSharedU64,                                       U32x2,          U32,                                                                            )
+OPCODE(LoadSharedU128,                                      U32x4,          U32,                                                                            )
+OPCODE(WriteSharedU8,                                       Void,           U32,            U32,                                                            )
+OPCODE(WriteSharedU16,                                      Void,           U32,            U32,                                                            )
+OPCODE(WriteSharedU32,                                      Void,           U32,            U32,                                                            )
+OPCODE(WriteSharedU64,                                      Void,           U32,            U32x2,                                                          )
+OPCODE(WriteSharedU128,                                     Void,           U32,            U32x4,                                                          )
+
+// Vector utility
+OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                                            )
+OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                                            )
+OPCODE(CompositeConstructU32x4,                             U32x4,          U32,            U32,            U32,            U32,                            )
+OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                                            )
+OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                                            )
+OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                                            )
+OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                                            )
+OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                                            )
+OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                                            )
+OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                                            )
+OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                                            )
+OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,                            )
+OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                                            )
+OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                                            )
+OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                                            )
+OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                                            )
+OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                                            )
+OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                                            )
+OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                                            )
+OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                                            )
+OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,                            )
+OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                                            )
+OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                                            )
+OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                                            )
+OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                                            )
+OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                                            )
+OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                                            )
+OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                                            )
+OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                                            )
+OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,                            )
+OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                                            )
+OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                                            )
+OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                                            )
+OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                                            )
+OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                                            )
+OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                                            )
+
+// Select operations
+OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                                             )
+OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                                             )
+OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                                            )
+OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                                            )
+OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                                            )
+OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                                            )
+OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                                            )
+OPCODE(SelectF64,                                           F64,            U1,             F64,            F64,                                            )
+
+// Bitwise conversions
+OPCODE(BitCastU16F16,                                       U16,            F16,                                                                            )
+OPCODE(BitCastU32F32,                                       U32,            F32,                                                                            )
+OPCODE(BitCastU64F64,                                       U64,            F64,                                                                            )
+OPCODE(BitCastF16U16,                                       F16,            U16,                                                                            )
+OPCODE(BitCastF32U32,                                       F32,            U32,                                                                            )
+OPCODE(BitCastF64U64,                                       F64,            U64,                                                                            )
+OPCODE(PackUint2x32,                                        U64,            U32x2,                                                                          )
+OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                                            )
+OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                                          )
+OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                                            )
+OPCODE(PackHalf2x16,                                        U32,            F32x2,                                                                          )
+OPCODE(UnpackHalf2x16,                                      F32x2,          U32,                                                                            )
+OPCODE(PackDouble2x32,                                      F64,            U32x2,                                                                          )
+OPCODE(UnpackDouble2x32,                                    U32x2,          F64,                                                                            )
+
+// Pseudo-operation, handled specially at final emit
+OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                                         )
+OPCODE(GetSignFromOp,                                       U1,             Opaque,                                                                         )
+OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         )
+OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         )
+OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         )
+OPCODE(GetInBoundsFromOp,                                   U1,             Opaque,                                                                         )
+
+// Floating-point operations
+OPCODE(FPAbs16,                                             F16,            F16,                                                                            )
+OPCODE(FPAbs32,                                             F32,            F32,                                                                            )
+OPCODE(FPAbs64,                                             F64,            F64,                                                                            )
+OPCODE(FPAdd16,                                             F16,            F16,            F16,                                                            )
+OPCODE(FPAdd32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPAdd64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPFma16,                                             F16,            F16,            F16,            F16,                                            )
+OPCODE(FPFma32,                                             F32,            F32,            F32,            F32,                                            )
+OPCODE(FPFma64,                                             F64,            F64,            F64,            F64,                                            )
+OPCODE(FPMax32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMax64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPMin32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMin64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPMul16,                                             F16,            F16,            F16,                                                            )
+OPCODE(FPMul32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMul64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPNeg16,                                             F16,            F16,                                                                            )
+OPCODE(FPNeg32,                                             F32,            F32,                                                                            )
+OPCODE(FPNeg64,                                             F64,            F64,                                                                            )
+OPCODE(FPRecip32,                                           F32,            F32,                                                                            )
+OPCODE(FPRecip64,                                           F64,            F64,                                                                            )
+OPCODE(FPRecipSqrt32,                                       F32,            F32,                                                                            )
+OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                                            )
+OPCODE(FPSqrt,                                              F32,            F32,                                                                            )
+OPCODE(FPSin,                                               F32,            F32,                                                                            )
+OPCODE(FPExp2,                                              F32,            F32,                                                                            )
+OPCODE(FPCos,                                               F32,            F32,                                                                            )
+OPCODE(FPLog2,                                              F32,            F32,                                                                            )
+OPCODE(FPSaturate16,                                        F16,            F16,                                                                            )
+OPCODE(FPSaturate32,                                        F32,            F32,                                                                            )
+OPCODE(FPSaturate64,                                        F64,            F64,                                                                            )
+OPCODE(FPClamp16,                                           F16,            F16,            F16,            F16,                                            )
+OPCODE(FPClamp32,                                           F32,            F32,            F32,            F32,                                            )
+OPCODE(FPClamp64,                                           F64,            F64,            F64,            F64,                                            )
+OPCODE(FPRoundEven16,                                       F16,            F16,                                                                            )
+OPCODE(FPRoundEven32,                                       F32,            F32,                                                                            )
+OPCODE(FPRoundEven64,                                       F64,            F64,                                                                            )
+OPCODE(FPFloor16,                                           F16,            F16,                                                                            )
+OPCODE(FPFloor32,                                           F32,            F32,                                                                            )
+OPCODE(FPFloor64,                                           F64,            F64,                                                                            )
+OPCODE(FPCeil16,                                            F16,            F16,                                                                            )
+OPCODE(FPCeil32,                                            F32,            F32,                                                                            )
+OPCODE(FPCeil64,                                            F64,            F64,                                                                            )
+OPCODE(FPTrunc16,                                           F16,            F16,                                                                            )
+OPCODE(FPTrunc32,                                           F32,            F32,                                                                            )
+OPCODE(FPTrunc64,                                           F64,            F64,                                                                            )
+
+OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                                            )
+OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                                            )
+OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                                            )
+OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                                            )
+OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                                            )
+OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                                            )
+OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                                            )
+OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                                            )
+OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                                            )
+OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                                            )
+OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                                            )
+OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                                            )
+OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                                            )
+OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                                            )
+OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                                            )
+OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                                            )
+OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                                            )
+OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                                            )
+OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                                            )
+OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                                            )
+OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                                            )
+OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                                            )
+OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                                            )
+OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                                            )
+OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                                            )
+OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                                            )
+OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                                            )
+OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                                            )
+OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                                            )
+OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                                            )
+OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                                            )
+OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                                            )
+OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                                            )
+OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                                            )
+OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                                            )
+OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                                            )
+OPCODE(FPIsNan16,                                           U1,             F16,                                                                            )
+OPCODE(FPIsNan32,                                           U1,             F32,                                                                            )
+OPCODE(FPIsNan64,                                           U1,             F64,                                                                            )
+
+// Integer operations
+OPCODE(IAdd32,                                              U32,            U32,            U32,                                                            )
+OPCODE(IAdd64,                                              U64,            U64,            U64,                                                            )
+OPCODE(ISub32,                                              U32,            U32,            U32,                                                            )
+OPCODE(ISub64,                                              U64,            U64,            U64,                                                            )
+OPCODE(IMul32,                                              U32,            U32,            U32,                                                            )
+OPCODE(INeg32,                                              U32,            U32,                                                                            )
+OPCODE(INeg64,                                              U64,            U64,                                                                            )
+OPCODE(IAbs32,                                              U32,            U32,                                                                            )
+OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                                            )
+OPCODE(ShiftLeftLogical64,                                  U64,            U64,            U32,                                                            )
+OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                                            )
+OPCODE(ShiftRightLogical64,                                 U64,            U64,            U32,                                                            )
+OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                                            )
+OPCODE(ShiftRightArithmetic64,                              U64,            U64,            U32,                                                            )
+OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                                            )
+OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                                            )
+OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                                            )
+OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                                            )
+OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                                            )
+OPCODE(BitReverse32,                                        U32,            U32,                                                                            )
+OPCODE(BitCount32,                                          U32,            U32,                                                                            )
+OPCODE(BitwiseNot32,                                        U32,            U32,                                                                            )
+
+OPCODE(FindSMsb32,                                          U32,            U32,                                                                            )
+OPCODE(FindUMsb32,                                          U32,            U32,                                                                            )
+OPCODE(SMin32,                                              U32,            U32,            U32,                                                            )
+OPCODE(UMin32,                                              U32,            U32,            U32,                                                            )
+OPCODE(SMax32,                                              U32,            U32,            U32,                                                            )
+OPCODE(UMax32,                                              U32,            U32,            U32,                                                            )
+OPCODE(SClamp32,                                            U32,            U32,            U32,            U32,                                            )
+OPCODE(UClamp32,                                            U32,            U32,            U32,            U32,                                            )
+OPCODE(SLessThan,                                           U1,             U32,            U32,                                                            )
+OPCODE(ULessThan,                                           U1,             U32,            U32,                                                            )
+OPCODE(IEqual,                                              U1,             U32,            U32,                                                            )
+OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                                            )
+OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                                            )
+OPCODE(INotEqual,                                           U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
+
+// Atomic operations
+OPCODE(SharedAtomicIAdd32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicSMin32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicUMin32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicSMax32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicUMax32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicInc32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicDec32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicAnd32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicOr32,                                    U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicXor32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicExchange32,                              U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicExchange64,                              U64,            U32,            U64,                                                            )
+
+OPCODE(GlobalAtomicIAdd32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicSMin32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicUMin32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicSMax32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicUMax32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicInc32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicDec32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicAnd32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicOr32,                                    U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicXor32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicExchange32,                              U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicIAdd64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicSMin64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicUMin64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicSMax64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicUMax64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicAnd64,                                   U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicOr64,                                    U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicXor64,                                   U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicExchange64,                              U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicAddF32,                                  F32,            U64,            F32,                                                            )
+OPCODE(GlobalAtomicAddF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicAddF32x2,                                U32,            U64,            F32x2,                                                          )
+OPCODE(GlobalAtomicMinF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicMinF32x2,                                U32,            U64,            F32x2,                                                          )
+OPCODE(GlobalAtomicMaxF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicMaxF32x2,                                U32,            U64,            F32x2,                                                          )
+
+OPCODE(StorageAtomicIAdd32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicSMin32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicUMin32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicSMax32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicUMax32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicInc32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicDec32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicAnd32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicOr32,                                   U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicXor32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicExchange32,                             U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicIAdd64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicSMin64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicUMin64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicSMax64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicUMax64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicAnd64,                                  U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicOr64,                                   U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicXor64,                                  U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicExchange64,                             U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicAddF32,                                 F32,            U32,            U32,            F32,                                            )
+OPCODE(StorageAtomicAddF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicAddF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+OPCODE(StorageAtomicMinF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicMinF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+OPCODE(StorageAtomicMaxF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicMaxF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+
+// Logical operations
+OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             )
+OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                                             )
+OPCODE(LogicalXor,                                          U1,             U1,             U1,                                                             )
+OPCODE(LogicalNot,                                          U1,             U1,                                                                             )
+
+// Conversion operations
+OPCODE(ConvertS16F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertS16F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertS16F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertS32F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertS32F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertS32F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertS64F16,                                       U64,            F16,                                                                            )
+OPCODE(ConvertS64F32,                                       U64,            F32,                                                                            )
+OPCODE(ConvertS64F64,                                       U64,            F64,                                                                            )
+OPCODE(ConvertU16F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertU16F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertU16F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertU32F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertU32F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertU32F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertU64F16,                                       U64,            F16,                                                                            )
+OPCODE(ConvertU64F32,                                       U64,            F32,                                                                            )
+OPCODE(ConvertU64F64,                                       U64,            F64,                                                                            )
+OPCODE(ConvertU64U32,                                       U64,            U32,                                                                            )
+OPCODE(ConvertU32U64,                                       U32,            U64,                                                                            )
+OPCODE(ConvertF16F32,                                       F16,            F32,                                                                            )
+OPCODE(ConvertF32F16,                                       F32,            F16,                                                                            )
+OPCODE(ConvertF32F64,                                       F32,            F64,                                                                            )
+OPCODE(ConvertF64F32,                                       F64,            F32,                                                                            )
+OPCODE(ConvertF16S8,                                        F16,            U32,                                                                            )
+OPCODE(ConvertF16S16,                                       F16,            U32,                                                                            )
+OPCODE(ConvertF16S32,                                       F16,            U32,                                                                            )
+OPCODE(ConvertF16S64,                                       F16,            U64,                                                                            )
+OPCODE(ConvertF16U8,                                        F16,            U32,                                                                            )
+OPCODE(ConvertF16U16,                                       F16,            U32,                                                                            )
+OPCODE(ConvertF16U32,                                       F16,            U32,                                                                            )
+OPCODE(ConvertF16U64,                                       F16,            U64,                                                                            )
+OPCODE(ConvertF32S8,                                        F32,            U32,                                                                            )
+OPCODE(ConvertF32S16,                                       F32,            U32,                                                                            )
+OPCODE(ConvertF32S32,                                       F32,            U32,                                                                            )
+OPCODE(ConvertF32S64,                                       F32,            U64,                                                                            )
+OPCODE(ConvertF32U8,                                        F32,            U32,                                                                            )
+OPCODE(ConvertF32U16,                                       F32,            U32,                                                                            )
+OPCODE(ConvertF32U32,                                       F32,            U32,                                                                            )
+OPCODE(ConvertF32U64,                                       F32,            U64,                                                                            )
+OPCODE(ConvertF64S8,                                        F64,            U32,                                                                            )
+OPCODE(ConvertF64S16,                                       F64,            U32,                                                                            )
+OPCODE(ConvertF64S32,                                       F64,            U32,                                                                            )
+OPCODE(ConvertF64S64,                                       F64,            U64,                                                                            )
+OPCODE(ConvertF64U8,                                        F64,            U32,                                                                            )
+OPCODE(ConvertF64U16,                                       F64,            U32,                                                                            )
+OPCODE(ConvertF64U32,                                       F64,            U32,                                                                            )
+OPCODE(ConvertF64U64,                                       F64,            U64,                                                                            )
+
+// Image operations
+OPCODE(BindlessImageSampleImplicitLod,                      F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BindlessImageSampleExplicitLod,                      F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BindlessImageSampleDrefImplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(BindlessImageSampleDrefExplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(BindlessImageGather,                                 F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BindlessImageGatherDref,                             F32x4,          U32,            Opaque,         Opaque,         Opaque,         F32,            )
+OPCODE(BindlessImageFetch,                                  F32x4,          U32,            Opaque,         Opaque,         U32,            Opaque,         )
+OPCODE(BindlessImageQueryDimensions,                        U32x4,          U32,            U32,                                                            )
+OPCODE(BindlessImageQueryLod,                               F32x4,          U32,            Opaque,                                                         )
+OPCODE(BindlessImageGradient,                               F32x4,          U32,            Opaque,         Opaque,         Opaque,         Opaque,         )
+OPCODE(BindlessImageRead,                                   U32x4,          U32,            Opaque,                                                         )
+OPCODE(BindlessImageWrite,                                  Void,           U32,            Opaque,         U32x4,                                          )
+
+OPCODE(BoundImageSampleImplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BoundImageSampleExplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BoundImageSampleDrefImplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(BoundImageSampleDrefExplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(BoundImageGather,                                    F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BoundImageGatherDref,                                F32x4,          U32,            Opaque,         Opaque,         Opaque,         F32,            )
+OPCODE(BoundImageFetch,                                     F32x4,          U32,            Opaque,         Opaque,         U32,            Opaque,         )
+OPCODE(BoundImageQueryDimensions,                           U32x4,          U32,            U32,                                                            )
+OPCODE(BoundImageQueryLod,                                  F32x4,          U32,            Opaque,                                                         )
+OPCODE(BoundImageGradient,                                  F32x4,          U32,            Opaque,         Opaque,         Opaque,         Opaque,         )
+OPCODE(BoundImageRead,                                      U32x4,          U32,            Opaque,                                                         )
+OPCODE(BoundImageWrite,                                     Void,           U32,            Opaque,         U32x4,                                          )
+
+OPCODE(ImageSampleImplicitLod,                              F32x4,          Opaque,         Opaque,         Opaque,         Opaque,                         )
+OPCODE(ImageSampleExplicitLod,                              F32x4,          Opaque,         Opaque,         Opaque,         Opaque,                         )
+OPCODE(ImageSampleDrefImplicitLod,                          F32,            Opaque,         Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(ImageSampleDrefExplicitLod,                          F32,            Opaque,         Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(ImageGather,                                         F32x4,          Opaque,         Opaque,         Opaque,         Opaque,                         )
+OPCODE(ImageGatherDref,                                     F32x4,          Opaque,         Opaque,         Opaque,         Opaque,         F32,            )
+OPCODE(ImageFetch,                                          F32x4,          Opaque,         Opaque,         Opaque,         U32,            Opaque,         )
+OPCODE(ImageQueryDimensions,                                U32x4,          Opaque,         U32,                                                            )
+OPCODE(ImageQueryLod,                                       F32x4,          Opaque,         Opaque,                                                         )
+OPCODE(ImageGradient,                                       F32x4,          Opaque,         Opaque,         Opaque,         Opaque,         Opaque,         )
+OPCODE(ImageRead,                                           U32x4,          Opaque,         Opaque,                                                         )
+OPCODE(ImageWrite,                                          Void,           Opaque,         Opaque,         U32x4,                                          )
+
+// Atomic Image operations
+
+OPCODE(BindlessImageAtomicIAdd32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicSMin32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicUMin32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicSMax32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicUMax32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicInc32,                            U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicDec32,                            U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicAnd32,                            U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicOr32,                             U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicXor32,                            U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicExchange32,                       U32,            U32,            Opaque,            U32,                                         )
+
+OPCODE(BoundImageAtomicIAdd32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicSMin32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicUMin32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicSMax32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicUMax32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicInc32,                               U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicDec32,                               U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicAnd32,                               U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicOr32,                                U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicXor32,                               U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicExchange32,                          U32,            U32,            Opaque,            U32,                                         )
+
+OPCODE(ImageAtomicIAdd32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicSMin32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicUMin32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicSMax32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicUMax32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicInc32,                                    U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicDec32,                                    U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicAnd32,                                    U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicOr32,                                     U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicXor32,                                    U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicExchange32,                               U32,            Opaque,            Opaque,            U32,                                      )
+
+// Warp operations
+OPCODE(LaneId,                                              U32,                                                                                            )
+OPCODE(VoteAll,                                             U1,             U1,                                                                             )
+OPCODE(VoteAny,                                             U1,             U1,                                                                             )
+OPCODE(VoteEqual,                                           U1,             U1,                                                                             )
+OPCODE(SubgroupBallot,                                      U32,            U1,                                                                             )
+OPCODE(SubgroupEqMask,                                      U32,                                                                                            )
+OPCODE(SubgroupLtMask,                                      U32,                                                                                            )
+OPCODE(SubgroupLeMask,                                      U32,                                                                                            )
+OPCODE(SubgroupGtMask,                                      U32,                                                                                            )
+OPCODE(SubgroupGeMask,                                      U32,                                                                                            )
+OPCODE(ShuffleIndex,                                        U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(ShuffleUp,                                           U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(ShuffleDown,                                         U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(ShuffleButterfly,                                    U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(FSwizzleAdd,                                         F32,            F32,            F32,            U32,                                            )
+OPCODE(DPdxFine,                                            F32,            F32,                                                                            )
+OPCODE(DPdyFine,                                            F32,            F32,                                                                            )
+OPCODE(DPdxCoarse,                                          F32,            F32,                                                                            )
+OPCODE(DPdyCoarse,                                          F32,            F32,                                                                            )
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp
new file mode 100644
index 000000000..4c956a970
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/patch.h"
+
+namespace Shader::IR {
+
+bool IsGeneric(Patch patch) noexcept {
+    return patch >= Patch::Component0 && patch <= Patch::Component119;
+}
+
+u32 GenericPatchIndex(Patch patch) {
+    if (!IsGeneric(patch)) {
+        throw InvalidArgument("Patch {} is not generic", patch);
+    }
+    return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
+}
+
+u32 GenericPatchElement(Patch patch) {
+    if (!IsGeneric(patch)) {
+        throw InvalidArgument("Patch {} is not generic", patch);
+    }
+    return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h
new file mode 100644
index 000000000..6d66ff0d6
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.h
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class Patch : u64 {
+    TessellationLodLeft,
+    TessellationLodTop,
+    TessellationLodRight,
+    TessellationLodBottom,
+    TessellationLodInteriorU,
+    TessellationLodInteriorV,
+    ComponentPadding0,
+    ComponentPadding1,
+    Component0,
+    Component1,
+    Component2,
+    Component3,
+    Component4,
+    Component5,
+    Component6,
+    Component7,
+    Component8,
+    Component9,
+    Component10,
+    Component11,
+    Component12,
+    Component13,
+    Component14,
+    Component15,
+    Component16,
+    Component17,
+    Component18,
+    Component19,
+    Component20,
+    Component21,
+    Component22,
+    Component23,
+    Component24,
+    Component25,
+    Component26,
+    Component27,
+    Component28,
+    Component29,
+    Component30,
+    Component31,
+    Component32,
+    Component33,
+    Component34,
+    Component35,
+    Component36,
+    Component37,
+    Component38,
+    Component39,
+    Component40,
+    Component41,
+    Component42,
+    Component43,
+    Component44,
+    Component45,
+    Component46,
+    Component47,
+    Component48,
+    Component49,
+    Component50,
+    Component51,
+    Component52,
+    Component53,
+    Component54,
+    Component55,
+    Component56,
+    Component57,
+    Component58,
+    Component59,
+    Component60,
+    Component61,
+    Component62,
+    Component63,
+    Component64,
+    Component65,
+    Component66,
+    Component67,
+    Component68,
+    Component69,
+    Component70,
+    Component71,
+    Component72,
+    Component73,
+    Component74,
+    Component75,
+    Component76,
+    Component77,
+    Component78,
+    Component79,
+    Component80,
+    Component81,
+    Component82,
+    Component83,
+    Component84,
+    Component85,
+    Component86,
+    Component87,
+    Component88,
+    Component89,
+    Component90,
+    Component91,
+    Component92,
+    Component93,
+    Component94,
+    Component95,
+    Component96,
+    Component97,
+    Component98,
+    Component99,
+    Component100,
+    Component101,
+    Component102,
+    Component103,
+    Component104,
+    Component105,
+    Component106,
+    Component107,
+    Component108,
+    Component109,
+    Component110,
+    Component111,
+    Component112,
+    Component113,
+    Component114,
+    Component115,
+    Component116,
+    Component117,
+    Component118,
+    Component119,
+};
+static_assert(static_cast<u64>(Patch::Component119) == 127);
+
+[[nodiscard]] bool IsGeneric(Patch patch) noexcept;
+
+[[nodiscard]] u32 GenericPatchIndex(Patch patch);
+
+[[nodiscard]] u32 GenericPatchElement(Patch patch);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp
new file mode 100644
index 000000000..16bc44101
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/post_order.h"
+
+namespace Shader::IR {
+
+BlockList PostOrder(const AbstractSyntaxNode& root) {
+    boost::container::small_vector<Block*, 16> block_stack;
+    boost::container::flat_set<Block*> visited;
+    BlockList post_order_blocks;
+
+    if (root.type != AbstractSyntaxNode::Type::Block) {
+        throw LogicError("First node in abstract syntax list root is not a block");
+    }
+    Block* const first_block{root.data.block};
+    visited.insert(first_block);
+    block_stack.push_back(first_block);
+
+    while (!block_stack.empty()) {
+        Block* const block{block_stack.back()};
+        const auto visit{[&](Block* branch) {
+            if (!visited.insert(branch).second) {
+                return false;
+            }
+            // Calling push_back twice is faster than insert on MSVC
+            block_stack.push_back(block);
+            block_stack.push_back(branch);
+            return true;
+        }};
+        block_stack.pop_back();
+        if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
+            post_order_blocks.push_back(block);
+        }
+    }
+    return post_order_blocks;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h
new file mode 100644
index 000000000..07bfbadc3
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::IR {
+
+BlockList PostOrder(const AbstractSyntaxNode& root);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
new file mode 100644
index 000000000..4e7f32423
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+namespace Shader::IR {
+
+enum class Pred : u64 {
+    P0,
+    P1,
+    P2,
+    P3,
+    P4,
+    P5,
+    P6,
+    PT,
+};
+
+constexpr size_t NUM_USER_PREDS = 7;
+constexpr size_t NUM_PREDS = 8;
+
+[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
+    return static_cast<size_t>(pred);
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Pred> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::IR::Pred& pred, FormatContext& ctx) {
+        if (pred == Shader::IR::Pred::PT) {
+            return fmt::format_to(ctx.out(), "PT");
+        } else {
+            return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred));
+        }
+    }
+};
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
new file mode 100644
index 000000000..3fc06f855
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+std::string DumpProgram(const Program& program) {
+    size_t index{0};
+    std::map<const IR::Inst*, size_t> inst_to_index;
+    std::map<const IR::Block*, size_t> block_to_index;
+
+    for (const IR::Block* const block : program.blocks) {
+        block_to_index.emplace(block, index);
+        ++index;
+    }
+    std::string ret;
+    for (const auto& block : program.blocks) {
+        ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
+    }
+    return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
new file mode 100644
index 000000000..ebcaa8bc2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string>
+
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/program_header.h"
+#include "shader_recompiler/shader_info.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader::IR {
+
+struct Program {
+    AbstractSyntaxList syntax_list;
+    BlockList blocks;
+    BlockList post_order_blocks;
+    Info info;
+    Stage stage{};
+    std::array<u32, 3> workgroup_size{};
+    OutputTopology output_topology{};
+    u32 output_vertices{};
+    u32 invocations{};
+    u32 local_memory_size{};
+    u32 shared_memory_size{};
+    bool is_geometry_passthrough{};
+};
+
+[[nodiscard]] std::string DumpProgram(const Program& program);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
new file mode 100644
index 000000000..a4b635792
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -0,0 +1,332 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+
+enum class Reg : u64 {
+    R0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R5,
+    R6,
+    R7,
+    R8,
+    R9,
+    R10,
+    R11,
+    R12,
+    R13,
+    R14,
+    R15,
+    R16,
+    R17,
+    R18,
+    R19,
+    R20,
+    R21,
+    R22,
+    R23,
+    R24,
+    R25,
+    R26,
+    R27,
+    R28,
+    R29,
+    R30,
+    R31,
+    R32,
+    R33,
+    R34,
+    R35,
+    R36,
+    R37,
+    R38,
+    R39,
+    R40,
+    R41,
+    R42,
+    R43,
+    R44,
+    R45,
+    R46,
+    R47,
+    R48,
+    R49,
+    R50,
+    R51,
+    R52,
+    R53,
+    R54,
+    R55,
+    R56,
+    R57,
+    R58,
+    R59,
+    R60,
+    R61,
+    R62,
+    R63,
+    R64,
+    R65,
+    R66,
+    R67,
+    R68,
+    R69,
+    R70,
+    R71,
+    R72,
+    R73,
+    R74,
+    R75,
+    R76,
+    R77,
+    R78,
+    R79,
+    R80,
+    R81,
+    R82,
+    R83,
+    R84,
+    R85,
+    R86,
+    R87,
+    R88,
+    R89,
+    R90,
+    R91,
+    R92,
+    R93,
+    R94,
+    R95,
+    R96,
+    R97,
+    R98,
+    R99,
+    R100,
+    R101,
+    R102,
+    R103,
+    R104,
+    R105,
+    R106,
+    R107,
+    R108,
+    R109,
+    R110,
+    R111,
+    R112,
+    R113,
+    R114,
+    R115,
+    R116,
+    R117,
+    R118,
+    R119,
+    R120,
+    R121,
+    R122,
+    R123,
+    R124,
+    R125,
+    R126,
+    R127,
+    R128,
+    R129,
+    R130,
+    R131,
+    R132,
+    R133,
+    R134,
+    R135,
+    R136,
+    R137,
+    R138,
+    R139,
+    R140,
+    R141,
+    R142,
+    R143,
+    R144,
+    R145,
+    R146,
+    R147,
+    R148,
+    R149,
+    R150,
+    R151,
+    R152,
+    R153,
+    R154,
+    R155,
+    R156,
+    R157,
+    R158,
+    R159,
+    R160,
+    R161,
+    R162,
+    R163,
+    R164,
+    R165,
+    R166,
+    R167,
+    R168,
+    R169,
+    R170,
+    R171,
+    R172,
+    R173,
+    R174,
+    R175,
+    R176,
+    R177,
+    R178,
+    R179,
+    R180,
+    R181,
+    R182,
+    R183,
+    R184,
+    R185,
+    R186,
+    R187,
+    R188,
+    R189,
+    R190,
+    R191,
+    R192,
+    R193,
+    R194,
+    R195,
+    R196,
+    R197,
+    R198,
+    R199,
+    R200,
+    R201,
+    R202,
+    R203,
+    R204,
+    R205,
+    R206,
+    R207,
+    R208,
+    R209,
+    R210,
+    R211,
+    R212,
+    R213,
+    R214,
+    R215,
+    R216,
+    R217,
+    R218,
+    R219,
+    R220,
+    R221,
+    R222,
+    R223,
+    R224,
+    R225,
+    R226,
+    R227,
+    R228,
+    R229,
+    R230,
+    R231,
+    R232,
+    R233,
+    R234,
+    R235,
+    R236,
+    R237,
+    R238,
+    R239,
+    R240,
+    R241,
+    R242,
+    R243,
+    R244,
+    R245,
+    R246,
+    R247,
+    R248,
+    R249,
+    R250,
+    R251,
+    R252,
+    R253,
+    R254,
+    RZ,
+};
+static_assert(static_cast<int>(Reg::RZ) == 255);
+
+constexpr size_t NUM_USER_REGS = 255;
+constexpr size_t NUM_REGS = 256;
+
+[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
+    if (reg == Reg::RZ) {
+        // Adding or subtracting registers from RZ yields RZ
+        return Reg::RZ;
+    }
+    const int result{static_cast<int>(reg) + num};
+    if (result >= static_cast<int>(Reg::RZ)) {
+        throw LogicError("Overflow on register arithmetic");
+    }
+    if (result < 0) {
+        throw LogicError("Underflow on register arithmetic");
+    }
+    return static_cast<Reg>(result);
+}
+
+[[nodiscard]] constexpr Reg operator-(Reg reg, int num) {
+    return reg + (-num);
+}
+
+constexpr Reg operator++(Reg& reg) {
+    reg = reg + 1;
+    return reg;
+}
+
+constexpr Reg operator++(Reg& reg, int) {
+    const Reg copy{reg};
+    reg = reg + 1;
+    return copy;
+}
+
+[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
+    return static_cast<size_t>(reg);
+}
+
+[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) {
+    return RegIndex(reg) % align == 0 || reg == Reg::RZ;
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Reg> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::IR::Reg& reg, FormatContext& ctx) {
+        if (reg == Shader::IR::Reg::RZ) {
+            return fmt::format_to(ctx.out(), "RZ");
+        } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) {
+            return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg));
+        } else {
+            throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg));
+        }
+    }
+};
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
new file mode 100644
index 000000000..f28341bfe
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <string>
+
+#include "shader_recompiler/frontend/ir/type.h"
+
+namespace Shader::IR {
+
+std::string NameOf(Type type) {
+    static constexpr std::array names{
+        "Opaque", "Label", "Reg",   "Pred",  "Attribute", "U1",    "U8",    "U16",   "U32",
+        "U64",    "F16",   "F32",   "F64",   "U32x2",     "U32x3", "U32x4", "F16x2", "F16x3",
+        "F16x4",  "F32x2", "F32x3", "F32x4", "F64x2",     "F64x3", "F64x4",
+    };
+    const size_t bits{static_cast<size_t>(type)};
+    if (bits == 0) {
+        return "Void";
+    }
+    std::string result;
+    for (size_t i = 0; i < names.size(); i++) {
+        if ((bits & (size_t{1} << i)) != 0) {
+            if (!result.empty()) {
+                result += '|';
+            }
+            result += names[i];
+        }
+    }
+    return result;
+}
+
+bool AreTypesCompatible(Type lhs, Type rhs) noexcept {
+    return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
new file mode 100644
index 000000000..294b230c4
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -0,0 +1,61 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "common/common_funcs.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+
+enum class Type {
+    Void = 0,
+    Opaque = 1 << 0,
+    Reg = 1 << 1,
+    Pred = 1 << 2,
+    Attribute = 1 << 3,
+    Patch = 1 << 4,
+    U1 = 1 << 5,
+    U8 = 1 << 6,
+    U16 = 1 << 7,
+    U32 = 1 << 8,
+    U64 = 1 << 9,
+    F16 = 1 << 10,
+    F32 = 1 << 11,
+    F64 = 1 << 12,
+    U32x2 = 1 << 13,
+    U32x3 = 1 << 14,
+    U32x4 = 1 << 15,
+    F16x2 = 1 << 16,
+    F16x3 = 1 << 17,
+    F16x4 = 1 << 18,
+    F32x2 = 1 << 19,
+    F32x3 = 1 << 20,
+    F32x4 = 1 << 21,
+    F64x2 = 1 << 22,
+    F64x3 = 1 << 23,
+    F64x4 = 1 << 24,
+};
+DECLARE_ENUM_FLAG_OPERATORS(Type)
+
+[[nodiscard]] std::string NameOf(Type type);
+
+[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept;
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Type> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::IR::Type& type, FormatContext& ctx) {
+        return fmt::format_to(ctx.out(), "{}", NameOf(type));
+    }
+};
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
new file mode 100644
index 000000000..d365ea1bc
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -0,0 +1,99 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
+
+Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {}
+
+Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
+
+Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
+
+Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {}
+
+Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
+
+Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
+
+Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
+
+Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
+
+Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
+
+Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
+
+Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
+
+IR::Type Value::Type() const noexcept {
+    if (IsPhi()) {
+        // The type of a phi node is stored in its flags
+        return inst->Flags<IR::Type>();
+    }
+    if (IsIdentity()) {
+        return inst->Arg(0).Type();
+    }
+    if (type == Type::Opaque) {
+        return inst->Type();
+    }
+    return type;
+}
+
+bool Value::operator==(const Value& other) const {
+    if (type != other.type) {
+        return false;
+    }
+    switch (type) {
+    case Type::Void:
+        return true;
+    case Type::Opaque:
+        return inst == other.inst;
+    case Type::Reg:
+        return reg == other.reg;
+    case Type::Pred:
+        return pred == other.pred;
+    case Type::Attribute:
+        return attribute == other.attribute;
+    case Type::Patch:
+        return patch == other.patch;
+    case Type::U1:
+        return imm_u1 == other.imm_u1;
+    case Type::U8:
+        return imm_u8 == other.imm_u8;
+    case Type::U16:
+    case Type::F16:
+        return imm_u16 == other.imm_u16;
+    case Type::U32:
+    case Type::F32:
+        return imm_u32 == other.imm_u32;
+    case Type::U64:
+    case Type::F64:
+        return imm_u64 == other.imm_u64;
+    case Type::U32x2:
+    case Type::U32x3:
+    case Type::U32x4:
+    case Type::F16x2:
+    case Type::F16x3:
+    case Type::F16x4:
+    case Type::F32x2:
+    case Type::F32x3:
+    case Type::F32x4:
+    case Type::F64x2:
+    case Type::F64x3:
+    case Type::F64x4:
+        break;
+    }
+    throw LogicError("Invalid type {}", type);
+}
+
+bool Value::operator!=(const Value& other) const {
+    return !operator==(other);
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
new file mode 100644
index 000000000..0c6bf684d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -0,0 +1,398 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/intrusive/list.hpp>
+
+#include "common/assert.h"
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/attribute.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/patch.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class Block;
+class Inst;
+
+struct AssociatedInsts;
+
+class Value {
+public:
+    Value() noexcept = default;
+    explicit Value(IR::Inst* value) noexcept;
+    explicit Value(IR::Reg value) noexcept;
+    explicit Value(IR::Pred value) noexcept;
+    explicit Value(IR::Attribute value) noexcept;
+    explicit Value(IR::Patch value) noexcept;
+    explicit Value(bool value) noexcept;
+    explicit Value(u8 value) noexcept;
+    explicit Value(u16 value) noexcept;
+    explicit Value(u32 value) noexcept;
+    explicit Value(f32 value) noexcept;
+    explicit Value(u64 value) noexcept;
+    explicit Value(f64 value) noexcept;
+
+    [[nodiscard]] bool IsIdentity() const noexcept;
+    [[nodiscard]] bool IsPhi() const noexcept;
+    [[nodiscard]] bool IsEmpty() const noexcept;
+    [[nodiscard]] bool IsImmediate() const noexcept;
+    [[nodiscard]] IR::Type Type() const noexcept;
+
+    [[nodiscard]] IR::Inst* Inst() const;
+    [[nodiscard]] IR::Inst* InstRecursive() const;
+    [[nodiscard]] IR::Value Resolve() const;
+    [[nodiscard]] IR::Reg Reg() const;
+    [[nodiscard]] IR::Pred Pred() const;
+    [[nodiscard]] IR::Attribute Attribute() const;
+    [[nodiscard]] IR::Patch Patch() const;
+    [[nodiscard]] bool U1() const;
+    [[nodiscard]] u8 U8() const;
+    [[nodiscard]] u16 U16() const;
+    [[nodiscard]] u32 U32() const;
+    [[nodiscard]] f32 F32() const;
+    [[nodiscard]] u64 U64() const;
+    [[nodiscard]] f64 F64() const;
+
+    [[nodiscard]] bool operator==(const Value& other) const;
+    [[nodiscard]] bool operator!=(const Value& other) const;
+
+private:
+    IR::Type type{};
+    union {
+        IR::Inst* inst{};
+        IR::Reg reg;
+        IR::Pred pred;
+        IR::Attribute attribute;
+        IR::Patch patch;
+        bool imm_u1;
+        u8 imm_u8;
+        u16 imm_u16;
+        u32 imm_u32;
+        f32 imm_f32;
+        u64 imm_u64;
+        f64 imm_f64;
+    };
+};
+static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
+static_assert(std::is_trivially_copyable_v<Value>);
+
+template <IR::Type type_>
+class TypedValue : public Value {
+public:
+    TypedValue() = default;
+
+    template <IR::Type other_type>
+    requires((other_type & type_) != IR::Type::Void) explicit(false)
+        TypedValue(const TypedValue<other_type>& value)
+        : Value(value) {}
+
+    explicit TypedValue(const Value& value) : Value(value) {
+        if ((value.Type() & type_) == IR::Type::Void) {
+            throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
+        }
+    }
+
+    explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
+};
+
+class Inst : public boost::intrusive::list_base_hook<> {
+public:
+    explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
+    ~Inst();
+
+    Inst& operator=(const Inst&) = delete;
+    Inst(const Inst&) = delete;
+
+    Inst& operator=(Inst&&) = delete;
+    Inst(Inst&&) = delete;
+
+    /// Get the number of uses this instruction has.
+    [[nodiscard]] int UseCount() const noexcept {
+        return use_count;
+    }
+
+    /// Determines whether this instruction has uses or not.
+    [[nodiscard]] bool HasUses() const noexcept {
+        return use_count > 0;
+    }
+
+    /// Get the opcode this microinstruction represents.
+    [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
+        return op;
+    }
+
+    /// Determines if there is a pseudo-operation associated with this instruction.
+    [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
+        return associated_insts != nullptr;
+    }
+
+    /// Determines whether or not this instruction may have side effects.
+    [[nodiscard]] bool MayHaveSideEffects() const noexcept;
+
+    /// Determines whether or not this instruction is a pseudo-instruction.
+    /// Pseudo-instructions depend on their parent instructions for their semantics.
+    [[nodiscard]] bool IsPseudoInstruction() const noexcept;
+
+    /// Determines if all arguments of this instruction are immediates.
+    [[nodiscard]] bool AreAllArgsImmediates() const;
+
+    /// Gets a pseudo-operation associated with this instruction
+    [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
+
+    /// Get the type this instruction returns.
+    [[nodiscard]] IR::Type Type() const;
+
+    /// Get the number of arguments this instruction has.
+    [[nodiscard]] size_t NumArgs() const {
+        return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
+    }
+
+    /// Get the value of a given argument index.
+    [[nodiscard]] Value Arg(size_t index) const noexcept {
+        if (op == IR::Opcode::Phi) {
+            return phi_args[index].second;
+        } else {
+            return args[index];
+        }
+    }
+
+    /// Set the value of a given argument index.
+    void SetArg(size_t index, Value value);
+
+    /// Get a pointer to the block of a phi argument.
+    [[nodiscard]] Block* PhiBlock(size_t index) const;
+    /// Add phi operand to a phi instruction.
+    void AddPhiOperand(Block* predecessor, const Value& value);
+
+    void Invalidate();
+    void ClearArgs();
+
+    void ReplaceUsesWith(Value replacement);
+
+    void ReplaceOpcode(IR::Opcode opcode);
+
+    template <typename FlagsType>
+    requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+        [[nodiscard]] FlagsType Flags() const noexcept {
+        FlagsType ret;
+        std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
+        return ret;
+    }
+
+    template <typename FlagsType>
+    requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+        [[nodiscard]] void SetFlags(FlagsType value) noexcept {
+        std::memcpy(&flags, &value, sizeof(value));
+    }
+
+    /// Intrusively store the host definition of this instruction.
+    template <typename DefinitionType>
+    void SetDefinition(DefinitionType def) {
+        definition = Common::BitCast<u32>(def);
+    }
+
+    /// Return the intrusively stored host definition of this instruction.
+    template <typename DefinitionType>
+    [[nodiscard]] DefinitionType Definition() const noexcept {
+        return Common::BitCast<DefinitionType>(definition);
+    }
+
+    /// Destructively remove one reference count from the instruction
+    /// Useful for register allocation
+    void DestructiveRemoveUsage() {
+        --use_count;
+    }
+
+    /// Destructively add usages to the instruction
+    /// Useful for register allocation
+    void DestructiveAddUsage(int count) {
+        use_count += count;
+    }
+
+private:
+    struct NonTriviallyDummy {
+        NonTriviallyDummy() noexcept {}
+    };
+
+    void Use(const Value& value);
+    void UndoUse(const Value& value);
+
+    IR::Opcode op{};
+    int use_count{};
+    u32 flags{};
+    u32 definition{};
+    union {
+        NonTriviallyDummy dummy{};
+        boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
+        std::array<Value, 5> args;
+    };
+    std::unique_ptr<AssociatedInsts> associated_insts;
+};
+static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
+
+struct AssociatedInsts {
+    union {
+        Inst* in_bounds_inst;
+        Inst* sparse_inst;
+        Inst* zero_inst{};
+    };
+    Inst* sign_inst{};
+    Inst* carry_inst{};
+    Inst* overflow_inst{};
+};
+
+using U1 = TypedValue<Type::U1>;
+using U8 = TypedValue<Type::U8>;
+using U16 = TypedValue<Type::U16>;
+using U32 = TypedValue<Type::U32>;
+using U64 = TypedValue<Type::U64>;
+using F16 = TypedValue<Type::F16>;
+using F32 = TypedValue<Type::F32>;
+using F64 = TypedValue<Type::F64>;
+using U32U64 = TypedValue<Type::U32 | Type::U64>;
+using F32F64 = TypedValue<Type::F32 | Type::F64>;
+using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
+using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
+using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
+
+inline bool Value::IsIdentity() const noexcept {
+    return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
+}
+
+inline bool Value::IsPhi() const noexcept {
+    return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
+}
+
+inline bool Value::IsEmpty() const noexcept {
+    return type == Type::Void;
+}
+
+inline bool Value::IsImmediate() const noexcept {
+    IR::Type current_type{type};
+    const IR::Inst* current_inst{inst};
+    while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
+        const Value& arg{current_inst->Arg(0)};
+        current_type = arg.type;
+        current_inst = arg.inst;
+    }
+    return current_type != Type::Opaque;
+}
+
+inline IR::Inst* Value::Inst() const {
+    DEBUG_ASSERT(type == Type::Opaque);
+    return inst;
+}
+
+inline IR::Inst* Value::InstRecursive() const {
+    DEBUG_ASSERT(type == Type::Opaque);
+    if (IsIdentity()) {
+        return inst->Arg(0).InstRecursive();
+    }
+    return inst;
+}
+
+inline IR::Value Value::Resolve() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).Resolve();
+    }
+    return *this;
+}
+
+inline IR::Reg Value::Reg() const {
+    DEBUG_ASSERT(type == Type::Reg);
+    return reg;
+}
+
+inline IR::Pred Value::Pred() const {
+    DEBUG_ASSERT(type == Type::Pred);
+    return pred;
+}
+
+inline IR::Attribute Value::Attribute() const {
+    DEBUG_ASSERT(type == Type::Attribute);
+    return attribute;
+}
+
+inline IR::Patch Value::Patch() const {
+    DEBUG_ASSERT(type == Type::Patch);
+    return patch;
+}
+
+inline bool Value::U1() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U1();
+    }
+    DEBUG_ASSERT(type == Type::U1);
+    return imm_u1;
+}
+
+inline u8 Value::U8() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U8();
+    }
+    DEBUG_ASSERT(type == Type::U8);
+    return imm_u8;
+}
+
+inline u16 Value::U16() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U16();
+    }
+    DEBUG_ASSERT(type == Type::U16);
+    return imm_u16;
+}
+
+inline u32 Value::U32() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U32();
+    }
+    DEBUG_ASSERT(type == Type::U32);
+    return imm_u32;
+}
+
+inline f32 Value::F32() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).F32();
+    }
+    DEBUG_ASSERT(type == Type::F32);
+    return imm_f32;
+}
+
+inline u64 Value::U64() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U64();
+    }
+    DEBUG_ASSERT(type == Type::U64);
+    return imm_u64;
+}
+
+inline f64 Value::F64() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).F64();
+    }
+    DEBUG_ASSERT(type == Type::F64);
+    return imm_f64;
+}
+
+[[nodiscard]] inline bool IsPhi(const Inst& inst) {
+    return inst.GetOpcode() == Opcode::Phi;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
new file mode 100644
index 000000000..1a954a509
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -0,0 +1,642 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell::Flow {
+namespace {
+struct Compare {
+    bool operator()(const Block& lhs, Location rhs) const noexcept {
+        return lhs.begin < rhs;
+    }
+
+    bool operator()(Location lhs, const Block& rhs) const noexcept {
+        return lhs < rhs.begin;
+    }
+
+    bool operator()(const Block& lhs, const Block& rhs) const noexcept {
+        return lhs.begin < rhs.begin;
+    }
+};
+
+u32 BranchOffset(Location pc, Instruction inst) {
+    return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
+}
+
+void Split(Block* old_block, Block* new_block, Location pc) {
+    if (pc <= old_block->begin || pc >= old_block->end) {
+        throw InvalidArgument("Invalid address to split={}", pc);
+    }
+    *new_block = Block{};
+    new_block->begin = pc;
+    new_block->end = old_block->end;
+    new_block->end_class = old_block->end_class;
+    new_block->cond = old_block->cond;
+    new_block->stack = old_block->stack;
+    new_block->branch_true = old_block->branch_true;
+    new_block->branch_false = old_block->branch_false;
+    new_block->function_call = old_block->function_call;
+    new_block->return_block = old_block->return_block;
+    new_block->branch_reg = old_block->branch_reg;
+    new_block->branch_offset = old_block->branch_offset;
+    new_block->indirect_branches = std::move(old_block->indirect_branches);
+
+    const Location old_begin{old_block->begin};
+    Stack old_stack{std::move(old_block->stack)};
+    *old_block = Block{};
+    old_block->begin = old_begin;
+    old_block->end = pc;
+    old_block->end_class = EndClass::Branch;
+    old_block->cond = IR::Condition(true);
+    old_block->stack = old_stack;
+    old_block->branch_true = new_block;
+    old_block->branch_false = nullptr;
+}
+
+Token OpcodeToken(Opcode opcode) {
+    switch (opcode) {
+    case Opcode::PBK:
+    case Opcode::BRK:
+        return Token::PBK;
+    case Opcode::PCNT:
+    case Opcode::CONT:
+        return Token::PBK;
+    case Opcode::PEXIT:
+    case Opcode::EXIT:
+        return Token::PEXIT;
+    case Opcode::PLONGJMP:
+    case Opcode::LONGJMP:
+        return Token::PLONGJMP;
+    case Opcode::PRET:
+    case Opcode::RET:
+    case Opcode::CAL:
+        return Token::PRET;
+    case Opcode::SSY:
+    case Opcode::SYNC:
+        return Token::SSY;
+    default:
+        throw InvalidArgument("{}", opcode);
+    }
+}
+
+bool IsAbsoluteJump(Opcode opcode) {
+    switch (opcode) {
+    case Opcode::JCAL:
+    case Opcode::JMP:
+    case Opcode::JMX:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool HasFlowTest(Opcode opcode) {
+    switch (opcode) {
+    case Opcode::BRA:
+    case Opcode::BRX:
+    case Opcode::EXIT:
+    case Opcode::JMP:
+    case Opcode::JMX:
+    case Opcode::KIL:
+    case Opcode::BRK:
+    case Opcode::CONT:
+    case Opcode::LONGJMP:
+    case Opcode::RET:
+    case Opcode::SYNC:
+        return true;
+    case Opcode::CAL:
+    case Opcode::JCAL:
+        return false;
+    default:
+        throw InvalidArgument("Invalid branch {}", opcode);
+    }
+}
+
+std::string NameOf(const Block& block) {
+    if (block.begin.IsVirtual()) {
+        return fmt::format("\"Virtual {}\"", block.begin);
+    } else {
+        return fmt::format("\"{}\"", block.begin);
+    }
+}
+} // Anonymous namespace
+
+void Stack::Push(Token token, Location target) {
+    entries.push_back({
+        .token = token,
+        .target{target},
+    });
+}
+
+std::pair<Location, Stack> Stack::Pop(Token token) const {
+    const std::optional<Location> pc{Peek(token)};
+    if (!pc) {
+        throw LogicError("Token could not be found");
+    }
+    return {*pc, Remove(token)};
+}
+
+std::optional<Location> Stack::Peek(Token token) const {
+    const auto it{std::find_if(entries.rbegin(), entries.rend(),
+                               [token](const auto& entry) { return entry.token == token; })};
+    if (it == entries.rend()) {
+        return std::nullopt;
+    }
+    return it->target;
+}
+
+Stack Stack::Remove(Token token) const {
+    const auto it{std::find_if(entries.rbegin(), entries.rend(),
+                               [token](const auto& entry) { return entry.token == token; })};
+    const auto pos{std::distance(entries.rbegin(), it)};
+    Stack result;
+    result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
+    return result;
+}
+
+bool Block::Contains(Location pc) const noexcept {
+    return pc >= begin && pc < end;
+}
+
+Function::Function(ObjectPool<Block>& block_pool, Location start_address)
+    : entrypoint{start_address} {
+    Label& label{labels.emplace_back()};
+    label.address = start_address;
+    label.block = block_pool.Create(Block{});
+    label.block->begin = start_address;
+    label.block->end = start_address;
+    label.block->end_class = EndClass::Branch;
+    label.block->cond = IR::Condition(true);
+    label.block->branch_true = nullptr;
+    label.block->branch_false = nullptr;
+}
+
+CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
+         bool exits_to_dispatcher_)
+    : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
+                                                                            exits_to_dispatcher_} {
+    if (exits_to_dispatcher) {
+        dispatch_block = block_pool.Create(Block{});
+        dispatch_block->begin = {};
+        dispatch_block->end = {};
+        dispatch_block->end_class = EndClass::Exit;
+        dispatch_block->cond = IR::Condition(true);
+        dispatch_block->stack = {};
+        dispatch_block->branch_true = nullptr;
+        dispatch_block->branch_false = nullptr;
+    }
+    functions.emplace_back(block_pool, start_address);
+    for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
+        while (!functions[function_id].labels.empty()) {
+            Function& function{functions[function_id]};
+            Label label{function.labels.back()};
+            function.labels.pop_back();
+            AnalyzeLabel(function_id, label);
+        }
+    }
+    if (exits_to_dispatcher) {
+        const auto last_block{functions[0].blocks.rbegin()};
+        dispatch_block->begin = last_block->end + 1;
+        dispatch_block->end = last_block->end + 1;
+        functions[0].blocks.insert(*dispatch_block);
+    }
+}
+
+void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
+    if (InspectVisitedBlocks(function_id, label)) {
+        // Label address has been visited
+        return;
+    }
+    // Try to find the next block
+    Function* const function{&functions[function_id]};
+    Location pc{label.address};
+    const auto next_it{function->blocks.upper_bound(pc, Compare{})};
+    const bool is_last{next_it == function->blocks.end()};
+    Block* const next{is_last ? nullptr : &*next_it};
+    // Insert before the next block
+    Block* const block{label.block};
+    // Analyze instructions until it reaches an already visited block or there's a branch
+    bool is_branch{false};
+    while (!next || pc < next->begin) {
+        is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
+        if (is_branch) {
+            break;
+        }
+        ++pc;
+    }
+    if (!is_branch) {
+        // If the block finished without a branch,
+        // it means that the next instruction is already visited, jump to it
+        block->end = pc;
+        block->cond = IR::Condition{true};
+        block->branch_true = next;
+        block->branch_false = nullptr;
+    }
+    // Function's pointer might be invalid, resolve it again
+    // Insert the new block
+    functions[function_id].blocks.insert(*block);
+}
+
+bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
+    const Location pc{label.address};
+    Function& function{functions[function_id]};
+    const auto it{
+        std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
+    if (it == function.blocks.end()) {
+        // Address has not been visited
+        return false;
+    }
+    Block* const visited_block{&*it};
+    if (visited_block->begin == pc) {
+        throw LogicError("Dangling block");
+    }
+    Block* const new_block{label.block};
+    Split(visited_block, new_block, pc);
+    function.blocks.insert(it, *new_block);
+    return true;
+}
+
+CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
+    const Instruction inst{env.ReadInstruction(pc.Offset())};
+    const Opcode opcode{Decode(inst.raw)};
+    switch (opcode) {
+    case Opcode::BRA:
+    case Opcode::JMP:
+    case Opcode::RET:
+        if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
+            return AnalysisState::Continue;
+        }
+        switch (opcode) {
+        case Opcode::BRA:
+        case Opcode::JMP:
+            AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
+            break;
+        case Opcode::RET:
+            block->end_class = EndClass::Return;
+            break;
+        default:
+            break;
+        }
+        block->end = pc;
+        return AnalysisState::Branch;
+    case Opcode::BRK:
+    case Opcode::CONT:
+    case Opcode::LONGJMP:
+    case Opcode::SYNC: {
+        if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
+            return AnalysisState::Continue;
+        }
+        const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
+        block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
+        block->end = pc;
+        return AnalysisState::Branch;
+    }
+    case Opcode::KIL: {
+        const Predicate pred{inst.Pred()};
+        const auto ir_pred{static_cast<IR::Pred>(pred.index)};
+        const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
+        AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
+        return AnalysisState::Branch;
+    }
+    case Opcode::PBK:
+    case Opcode::PCNT:
+    case Opcode::PEXIT:
+    case Opcode::PLONGJMP:
+    case Opcode::SSY:
+        block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
+        return AnalysisState::Continue;
+    case Opcode::BRX:
+    case Opcode::JMX:
+        return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
+    case Opcode::EXIT:
+        return AnalyzeEXIT(block, function_id, pc, inst);
+    case Opcode::PRET:
+        throw NotImplementedException("PRET flow analysis");
+    case Opcode::CAL:
+    case Opcode::JCAL: {
+        const bool is_absolute{IsAbsoluteJump(opcode)};
+        const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
+        // Technically CAL pushes into PRET, but that's implicit in the function call for us
+        // Insert the function into the list if it doesn't exist
+        const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
+        const bool exists{it != functions.end()};
+        const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
+                                        : functions.size()};
+        if (!exists) {
+            functions.emplace_back(block_pool, cal_pc);
+        }
+        block->end_class = EndClass::Call;
+        block->function_call = call_id;
+        block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
+        block->end = pc;
+        return AnalysisState::Branch;
+    }
+    default:
+        break;
+    }
+    const Predicate pred{inst.Pred()};
+    if (pred == Predicate{true} || pred == Predicate{false}) {
+        return AnalysisState::Continue;
+    }
+    const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
+    AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
+    return AnalysisState::Branch;
+}
+
+void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
+                          EndClass insn_end_class, IR::Condition cond) {
+    if (block->begin != pc) {
+        // If the block doesn't start in the conditional instruction
+        // mark it as a label to visit it later
+        block->end = pc;
+        block->cond = IR::Condition{true};
+        block->branch_true = AddLabel(block, block->stack, pc, function_id);
+        block->branch_false = nullptr;
+        return;
+    }
+    // Create a virtual block and a conditional block
+    Block* const conditional_block{block_pool.Create()};
+    Block virtual_block{};
+    virtual_block.begin = block->begin.Virtual();
+    virtual_block.end = block->begin.Virtual();
+    virtual_block.end_class = EndClass::Branch;
+    virtual_block.stack = block->stack;
+    virtual_block.cond = cond;
+    virtual_block.branch_true = conditional_block;
+    virtual_block.branch_false = nullptr;
+    // Save the contents of the visited block in the conditional block
+    *conditional_block = std::move(*block);
+    // Impersonate the visited block with a virtual block
+    *block = std::move(virtual_block);
+    // Set the end properties of the conditional instruction
+    conditional_block->end = pc + 1;
+    conditional_block->end_class = insn_end_class;
+    // Add a label to the instruction after the conditional instruction
+    Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
+    // Branch to the next instruction from the virtual block
+    block->branch_false = endif_block;
+    // And branch to it from the conditional instruction if it is a branch or a kill instruction
+    // Kill instructions are considered a branch because they demote to a helper invocation and
+    // execution may continue.
+    if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
+        conditional_block->cond = IR::Condition{true};
+        conditional_block->branch_true = endif_block;
+        conditional_block->branch_false = nullptr;
+    }
+    // Finally insert the condition block into the list of blocks
+    functions[function_id].blocks.insert(*conditional_block);
+}
+
+bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
+                        Opcode opcode) {
+    if (inst.branch.is_cbuf) {
+        throw NotImplementedException("Branch with constant buffer offset");
+    }
+    const Predicate pred{inst.Pred()};
+    if (pred == Predicate{false}) {
+        return false;
+    }
+    const bool has_flow_test{HasFlowTest(opcode)};
+    const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
+    if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
+        block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
+        block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
+    } else {
+        block->cond = IR::Condition{true};
+    }
+    return true;
+}
+
+void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
+                     bool is_absolute) {
+    const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
+    block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
+}
+
+CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+                                   FunctionId function_id) {
+    const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
+    if (!brx_table) {
+        TrackIndirectBranchTable(env, pc, program_start);
+        throw NotImplementedException("Failed to track indirect branch");
+    }
+    const IR::FlowTest flow_test{inst.branch.flow_test};
+    const Predicate pred{inst.Pred()};
+    if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
+        throw NotImplementedException("Conditional indirect branch");
+    }
+    std::vector<u32> targets;
+    targets.reserve(brx_table->num_entries);
+    for (u32 i = 0; i < brx_table->num_entries; ++i) {
+        u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
+        if (!is_absolute) {
+            target += pc.Offset();
+        }
+        target += static_cast<u32>(brx_table->branch_offset);
+        target += 8;
+        targets.push_back(target);
+    }
+    std::ranges::sort(targets);
+    targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
+
+    block->indirect_branches.reserve(targets.size());
+    for (const u32 target : targets) {
+        Block* const branch{AddLabel(block, block->stack, target, function_id)};
+        block->indirect_branches.push_back({
+            .block = branch,
+            .address = target,
+        });
+    }
+    block->cond = IR::Condition{true};
+    block->end = pc + 1;
+    block->end_class = EndClass::IndirectBranch;
+    block->branch_reg = brx_table->branch_reg;
+    block->branch_offset = brx_table->branch_offset + 8;
+    if (!is_absolute) {
+        block->branch_offset += pc.Offset();
+    }
+    return AnalysisState::Branch;
+}
+
+CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
+                                    Instruction inst) {
+    const IR::FlowTest flow_test{inst.branch.flow_test};
+    const Predicate pred{inst.Pred()};
+    if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
+        // EXIT will never be taken
+        return AnalysisState::Continue;
+    }
+    if (exits_to_dispatcher && function_id != 0) {
+        throw NotImplementedException("Dispatch EXIT on external function");
+    }
+    if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
+        if (block->stack.Peek(Token::PEXIT).has_value()) {
+            throw NotImplementedException("Conditional EXIT with PEXIT token");
+        }
+        const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
+        if (exits_to_dispatcher) {
+            block->end = pc;
+            block->end_class = EndClass::Branch;
+            block->cond = cond;
+            block->branch_true = dispatch_block;
+            block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
+            return AnalysisState::Branch;
+        }
+        AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
+        return AnalysisState::Branch;
+    }
+    if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
+        const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
+        block->cond = IR::Condition{true};
+        block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
+        block->branch_false = nullptr;
+        return AnalysisState::Branch;
+    }
+    if (exits_to_dispatcher) {
+        block->cond = IR::Condition{true};
+        block->end = pc;
+        block->end_class = EndClass::Branch;
+        block->branch_true = dispatch_block;
+        block->branch_false = nullptr;
+        return AnalysisState::Branch;
+    }
+    block->end = pc + 1;
+    block->end_class = EndClass::Exit;
+    return AnalysisState::Branch;
+}
+
+Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
+    Function& function{functions[function_id]};
+    if (block->begin == pc) {
+        // Jumps to itself
+        return block;
+    }
+    if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
+        // Block already exists and it has been visited
+        if (function.blocks.begin() != it) {
+            // Check if the previous node is the virtual variant of the label
+            // This won't exist if a virtual node is not needed or it hasn't been visited
+            // If it hasn't been visited and a virtual node is needed, this will still behave as
+            // expected because the node impersonated with its virtual node.
+            const auto prev{std::prev(it)};
+            if (it->begin.Virtual() == prev->begin) {
+                return &*prev;
+            }
+        }
+        return &*it;
+    }
+    // Make sure we don't insert the same layer twice
+    const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
+    if (label_it != function.labels.end()) {
+        return label_it->block;
+    }
+    Block* const new_block{block_pool.Create()};
+    new_block->begin = pc;
+    new_block->end = pc;
+    new_block->end_class = EndClass::Branch;
+    new_block->cond = IR::Condition(true);
+    new_block->stack = stack;
+    new_block->branch_true = nullptr;
+    new_block->branch_false = nullptr;
+    function.labels.push_back(Label{
+        .address{pc},
+        .block = new_block,
+        .stack{std::move(stack)},
+    });
+    return new_block;
+}
+
+std::string CFG::Dot() const {
+    int node_uid{0};
+
+    std::string dot{"digraph shader {\n"};
+    for (const Function& function : functions) {
+        dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
+        dot += fmt::format("\t\tnode [style=filled];\n");
+        for (const Block& block : function.blocks) {
+            const std::string name{NameOf(block)};
+            const auto add_branch = [&](Block* branch, bool add_label) {
+                dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
+                if (add_label && block.cond != IR::Condition{true} &&
+                    block.cond != IR::Condition{false}) {
+                    dot += fmt::format(" [label=\"{}\"]", block.cond);
+                }
+                dot += '\n';
+            };
+            dot += fmt::format("\t\t{};\n", name);
+            switch (block.end_class) {
+            case EndClass::Branch:
+                if (block.cond != IR::Condition{false}) {
+                    add_branch(block.branch_true, true);
+                }
+                if (block.cond != IR::Condition{true}) {
+                    add_branch(block.branch_false, false);
+                }
+                break;
+            case EndClass::IndirectBranch:
+                for (const IndirectBranch& branch : block.indirect_branches) {
+                    add_branch(branch.block, false);
+                }
+                break;
+            case EndClass::Call:
+                dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+                dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
+                dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
+                                   node_uid, block.function_call);
+                dot += '\n';
+                ++node_uid;
+                break;
+            case EndClass::Exit:
+                dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+                dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
+                                   node_uid);
+                ++node_uid;
+                break;
+            case EndClass::Return:
+                dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+                dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
+                                   node_uid);
+                ++node_uid;
+                break;
+            case EndClass::Kill:
+                dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+                dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
+                                   node_uid);
+                ++node_uid;
+                break;
+            }
+        }
+        if (function.entrypoint == 8) {
+            dot += fmt::format("\t\tlabel = \"main\";\n");
+        } else {
+            dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
+        }
+        dot += "\t}\n";
+    }
+    if (!functions.empty()) {
+        auto& function{functions.front()};
+        if (function.blocks.empty()) {
+            dot += "Start;\n";
+        } else {
+            dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
+        }
+        dot += fmt::format("\tStart [shape=diamond];\n");
+    }
+    dot += "}\n";
+    return dot;
+}
+
+} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
new file mode 100644
index 000000000..a6bd3e196
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <optional>
+#include <span>
+#include <string>
+#include <vector>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/intrusive/set.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/condition.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell::Flow {
+
+struct Block;
+
+using FunctionId = size_t;
+
+enum class EndClass {
+    Branch,
+    IndirectBranch,
+    Call,
+    Exit,
+    Return,
+    Kill,
+};
+
+enum class Token {
+    SSY,
+    PBK,
+    PEXIT,
+    PRET,
+    PCNT,
+    PLONGJMP,
+};
+
+struct StackEntry {
+    auto operator<=>(const StackEntry&) const noexcept = default;
+
+    Token token;
+    Location target;
+};
+
+class Stack {
+public:
+    void Push(Token token, Location target);
+    [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
+    [[nodiscard]] std::optional<Location> Peek(Token token) const;
+    [[nodiscard]] Stack Remove(Token token) const;
+
+private:
+    boost::container::small_vector<StackEntry, 3> entries;
+};
+
+struct IndirectBranch {
+    Block* block;
+    u32 address;
+};
+
+struct Block : boost::intrusive::set_base_hook<
+                   // Normal link is ~2.5% faster compared to safe link
+                   boost::intrusive::link_mode<boost::intrusive::normal_link>> {
+    [[nodiscard]] bool Contains(Location pc) const noexcept;
+
+    bool operator<(const Block& rhs) const noexcept {
+        return begin < rhs.begin;
+    }
+
+    Location begin;
+    Location end;
+    EndClass end_class{};
+    IR::Condition cond{};
+    Stack stack;
+    Block* branch_true{};
+    Block* branch_false{};
+    FunctionId function_call{};
+    Block* return_block{};
+    IR::Reg branch_reg{};
+    s32 branch_offset{};
+    std::vector<IndirectBranch> indirect_branches;
+};
+
+struct Label {
+    Location address;
+    Block* block;
+    Stack stack;
+};
+
+struct Function {
+    explicit Function(ObjectPool<Block>& block_pool, Location start_address);
+
+    Location entrypoint;
+    boost::container::small_vector<Label, 16> labels;
+    boost::intrusive::set<Block> blocks;
+};
+
+class CFG {
+    enum class AnalysisState {
+        Branch,
+        Continue,
+    };
+
+public:
+    explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
+                 bool exits_to_dispatcher = false);
+
+    CFG& operator=(const CFG&) = delete;
+    CFG(const CFG&) = delete;
+
+    CFG& operator=(CFG&&) = delete;
+    CFG(CFG&&) = delete;
+
+    [[nodiscard]] std::string Dot() const;
+
+    [[nodiscard]] std::span<const Function> Functions() const noexcept {
+        return std::span(functions.data(), functions.size());
+    }
+    [[nodiscard]] std::span<Function> Functions() noexcept {
+        return std::span(functions.data(), functions.size());
+    }
+
+    [[nodiscard]] bool ExitsToDispatcher() const {
+        return exits_to_dispatcher;
+    }
+
+private:
+    void AnalyzeLabel(FunctionId function_id, Label& label);
+
+    /// Inspect already visited blocks.
+    /// Return true when the block has already been visited
+    bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
+
+    AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
+
+    void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
+                         IR::Condition cond);
+
+    /// Return true when the branch instruction is confirmed to be a branch
+    bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
+                       Opcode opcode);
+
+    void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
+                    bool is_absolute);
+    AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+                             FunctionId function_id);
+    AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
+
+    /// Return the branch target block id
+    Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
+
+    Environment& env;
+    ObjectPool<Block>& block_pool;
+    boost::container::small_vector<Function, 1> functions;
+    Location program_start;
+    bool exits_to_dispatcher{};
+    Block* dispatch_block{};
+};
+
+} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
new file mode 100644
index 000000000..972f677dc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <bit>
+#include <memory>
+#include <string_view>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+namespace {
+struct MaskValue {
+    u64 mask;
+    u64 value;
+};
+
+constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
+    u64 mask{};
+    u64 value{};
+    u64 bit{u64(1) << 63};
+    while (*encoding) {
+        switch (*encoding) {
+        case '0':
+            mask |= bit;
+            break;
+        case '1':
+            mask |= bit;
+            value |= bit;
+            break;
+        case '-':
+            break;
+        case ' ':
+            break;
+        default:
+            throw LogicError("Invalid encoding character '{}'", *encoding);
+        }
+        ++encoding;
+        if (*encoding != ' ') {
+            bit >>= 1;
+        }
+    }
+    return MaskValue{.mask = mask, .value = value};
+}
+
+struct InstEncoding {
+    MaskValue mask_value;
+    Opcode opcode;
+};
+constexpr std::array UNORDERED_ENCODINGS{
+#define INST(name, cute, encode)                                                                   \
+    InstEncoding{                                                                                  \
+        .mask_value{MaskValueFromEncoding(encode)},                                                \
+        .opcode = Opcode::name,                                                                    \
+    },
+#include "maxwell.inc"
+#undef INST
+};
+
+constexpr auto SortedEncodings() {
+    std::array encodings{UNORDERED_ENCODINGS};
+    std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
+        return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
+    });
+    return encodings;
+}
+constexpr auto ENCODINGS{SortedEncodings()};
+
+constexpr int WidestLeftBits() {
+    int bits{64};
+    for (const InstEncoding& encoding : ENCODINGS) {
+        bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
+    }
+    return 64 - bits;
+}
+constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
+constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
+
+constexpr size_t ToFastLookupIndex(u64 value) {
+    return static_cast<size_t>(value >> MASK_SHIFT);
+}
+
+constexpr size_t FastLookupSize() {
+    size_t max_width{};
+    for (const InstEncoding& encoding : ENCODINGS) {
+        max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
+    }
+    return max_width + 1;
+}
+constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
+
+struct InstInfo {
+    [[nodiscard]] u64 Mask() const noexcept {
+        return static_cast<u64>(high_mask) << MASK_SHIFT;
+    }
+
+    [[nodiscard]] u64 Value() const noexcept {
+        return static_cast<u64>(high_value) << MASK_SHIFT;
+    }
+
+    u16 high_mask;
+    u16 high_value;
+    Opcode opcode;
+};
+
+constexpr auto MakeFastLookupTableIndex(size_t index) {
+    std::array<InstInfo, 2> encodings{};
+    size_t element{};
+    for (const auto& encoding : ENCODINGS) {
+        const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
+        const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
+        if ((index & mask) == value) {
+            encodings.at(element) = InstInfo{
+                .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
+                .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
+                .opcode = encoding.opcode,
+            };
+            ++element;
+        }
+    }
+    return encodings;
+}
+
+/*constexpr*/ auto MakeFastLookupTable() {
+    auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
+    for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
+        (*encodings)[index] = MakeFastLookupTableIndex(index);
+    }
+    return encodings;
+}
+const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
+} // Anonymous namespace
+
+Opcode Decode(u64 insn) {
+    const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
+    const auto it{std::ranges::find_if(
+        table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
+    if (it == table.end()) {
+        throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
+    }
+    return it->opcode;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h
new file mode 100644
index 000000000..b4f080fd7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] Opcode Decode(u64 insn);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 000000000..008625cb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+namespace {
+union Encoding {
+    u64 raw;
+    BitField<0, 8, IR::Reg> dest_reg;
+    BitField<8, 8, IR::Reg> src_reg;
+    BitField<20, 19, u64> immediate;
+    BitField<56, 1, u64> is_negative;
+    BitField<20, 24, s64> brx_offset;
+};
+
+template <typename Callable>
+std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
+    while (pos >= block_begin) {
+        const u64 insn{env.ReadInstruction(pos.Offset())};
+        --pos;
+        if (func(insn, Decode(insn))) {
+            return insn;
+        }
+    }
+    return std::nullopt;
+}
+
+std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
+                            IR::Reg brx_reg) {
+    return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
+        const LDC::Encoding ldc{insn};
+        return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
+               ldc.mode == LDC::Mode::Default;
+    });
+}
+
+std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
+                            IR::Reg ldc_reg) {
+    return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
+        const Encoding shl{insn};
+        return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
+    });
+}
+
+std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
+                              IR::Reg shl_reg) {
+    return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
+        const Encoding imnmx{insn};
+        return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
+    });
+}
+} // Anonymous namespace
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+                                                                Location block_begin) {
+    const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
+    const Opcode brx_opcode{Decode(brx_insn)};
+    if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
+        throw LogicError("Tracked instruction is not BRX or JMX");
+    }
+    const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
+    const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
+
+    Location pos{brx_pos};
+    const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
+    if (!ldc_insn) {
+        return std::nullopt;
+    }
+    const LDC::Encoding ldc{*ldc_insn};
+    const u32 cbuf_index{static_cast<u32>(ldc.index)};
+    const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
+    const IR::Reg ldc_reg{ldc.src_reg};
+
+    const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
+    if (!shl_insn) {
+        return std::nullopt;
+    }
+    const Encoding shl{*shl_insn};
+    const IR::Reg shl_reg{shl.src_reg};
+
+    const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
+    if (!imnmx_insn) {
+        return std::nullopt;
+    }
+    const Encoding imnmx{*imnmx_insn};
+    if (imnmx.is_negative != 0) {
+        return std::nullopt;
+    }
+    const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
+    return IndirectBranchTableInfo{
+        .cbuf_index = cbuf_index,
+        .cbuf_offset = cbuf_offset,
+        .num_entries = imnmx_immediate + 1,
+        .branch_offset = brx_offset,
+        .branch_reg = brx_reg,
+    };
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 000000000..eee5102fa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell {
+
+struct IndirectBranchTableInfo {
+    u32 cbuf_index{};
+    u32 cbuf_offset{};
+    u32 num_entries{};
+    s32 branch_offset{};
+    IR::Reg branch_reg{};
+};
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+                                                                Location block_begin);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
new file mode 100644
index 000000000..743d68d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -0,0 +1,63 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/flow_test.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell {
+
+struct Predicate {
+    Predicate() = default;
+    Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
+    Predicate(bool value) : index{7}, negated{!value} {}
+    Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
+
+    unsigned index;
+    bool negated;
+};
+
+inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
+    return lhs.index == rhs.index && lhs.negated == rhs.negated;
+}
+
+inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
+    return !(lhs == rhs);
+}
+
+union Instruction {
+    Instruction(u64 raw_) : raw{raw_} {}
+
+    u64 raw;
+
+    union {
+        BitField<5, 1, u64> is_cbuf;
+        BitField<0, 5, IR::FlowTest> flow_test;
+
+        [[nodiscard]] u32 Absolute() const noexcept {
+            return static_cast<u32>(absolute);
+        }
+
+        [[nodiscard]] s32 Offset() const noexcept {
+            return static_cast<s32>(offset);
+        }
+
+    private:
+        BitField<20, 24, s64> offset;
+        BitField<20, 32, u64> absolute;
+    } branch;
+
+    [[nodiscard]] Predicate Pred() const noexcept {
+        return Predicate{pred};
+    }
+
+private:
+    BitField<16, 4, u64> pred;
+};
+static_assert(std::is_trivially_copyable_v<Instruction>);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
new file mode 100644
index 000000000..26d29eae2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -0,0 +1,112 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <iterator>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::Maxwell {
+
+class Location {
+    static constexpr u32 VIRTUAL_BIAS{4};
+
+public:
+    constexpr Location() = default;
+
+    constexpr Location(u32 initial_offset) : offset{initial_offset} {
+        if (initial_offset % 8 != 0) {
+            throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
+        }
+        Align();
+    }
+
+    constexpr Location Virtual() const noexcept {
+        Location virtual_location;
+        virtual_location.offset = offset - VIRTUAL_BIAS;
+        return virtual_location;
+    }
+
+    [[nodiscard]] constexpr u32 Offset() const noexcept {
+        return offset;
+    }
+
+    [[nodiscard]] constexpr bool IsVirtual() const {
+        return offset % 8 == VIRTUAL_BIAS;
+    }
+
+    constexpr auto operator<=>(const Location&) const noexcept = default;
+
+    constexpr Location operator++() noexcept {
+        const Location copy{*this};
+        Step();
+        return copy;
+    }
+
+    constexpr Location operator++(int) noexcept {
+        Step();
+        return *this;
+    }
+
+    constexpr Location operator--() noexcept {
+        const Location copy{*this};
+        Back();
+        return copy;
+    }
+
+    constexpr Location operator--(int) noexcept {
+        Back();
+        return *this;
+    }
+
+    constexpr Location operator+(int number) const {
+        Location new_pc{*this};
+        while (number > 0) {
+            --number;
+            ++new_pc;
+        }
+        while (number < 0) {
+            ++number;
+            --new_pc;
+        }
+        return new_pc;
+    }
+
+    constexpr Location operator-(int number) const {
+        return operator+(-number);
+    }
+
+private:
+    constexpr void Align() {
+        offset += offset % 32 == 0 ? 8 : 0;
+    }
+
+    constexpr void Step() {
+        offset += 8 + (offset % 32 == 24 ? 8 : 0);
+    }
+
+    constexpr void Back() {
+        offset -= 8 + (offset % 32 == 8 ? 8 : 0);
+    }
+
+    u32 offset{0xcccccccc};
+};
+
+} // namespace Shader::Maxwell
+
+template <>
+struct fmt::formatter<Shader::Maxwell::Location> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
+        return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
+    }
+};
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
new file mode 100644
index 000000000..2fee591bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -0,0 +1,286 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+INST(AL2P,         "AL2P",           "1110 1111 1010 0---")
+INST(ALD,          "ALD",            "1110 1111 1101 1---")
+INST(AST,          "AST",            "1110 1111 1111 0---")
+INST(ATOM_cas,     "ATOM (cas)",     "1110 1110 1111 ----")
+INST(ATOM,         "ATOM",           "1110 1101 ---- ----")
+INST(ATOMS_cas,    "ATOMS (cas)",    "1110 1110 ---- ----")
+INST(ATOMS,        "ATOMS",          "1110 1100 ---- ----")
+INST(B2R,          "B2R",            "1111 0000 1011 1---")
+INST(BAR,          "BAR",            "1111 0000 1010 1---")
+INST(BFE_reg,      "BFE (reg)",      "0101 1100 0000 0---")
+INST(BFE_cbuf,     "BFE (cbuf)",     "0100 1100 0000 0---")
+INST(BFE_imm,      "BFE (imm)",      "0011 100- 0000 0---")
+INST(BFI_reg,      "BFI (reg)",      "0101 1011 1111 0---")
+INST(BFI_rc,       "BFI (rc)",       "0101 0011 1111 0---")
+INST(BFI_cr,       "BFI (cr)",       "0100 1011 1111 0---")
+INST(BFI_imm,      "BFI (imm)",      "0011 011- 1111 0---")
+INST(BPT,          "BPT",            "1110 0011 1010 ----")
+INST(BRA,          "BRA",            "1110 0010 0100 ----")
+INST(BRK,          "BRK",            "1110 0011 0100 ----")
+INST(BRX,          "BRX",            "1110 0010 0101 ----")
+INST(CAL,          "CAL",            "1110 0010 0110 ----")
+INST(CCTL,         "CCTL",           "1110 1111 011- ----")
+INST(CCTLL,        "CCTLL",          "1110 1111 100- ----")
+INST(CONT,         "CONT",           "1110 0011 0101 ----")
+INST(CS2R,         "CS2R",           "0101 0000 1100 1---")
+INST(CSET,         "CSET",           "0101 0000 1001 1---")
+INST(CSETP,        "CSETP",          "0101 0000 1010 0---")
+INST(DADD_reg,     "DADD (reg)",     "0101 1100 0111 0---")
+INST(DADD_cbuf,    "DADD (cbuf)",    "0100 1100 0111 0---")
+INST(DADD_imm,     "DADD (imm)",     "0011 100- 0111 0---")
+INST(DEPBAR,       "DEPBAR",         "1111 0000 1111 0---")
+INST(DFMA_reg,     "DFMA (reg)",     "0101 1011 0111 ----")
+INST(DFMA_rc,      "DFMA (rc)",      "0101 0011 0111 ----")
+INST(DFMA_cr,      "DFMA (cr)",      "0100 1011 0111 ----")
+INST(DFMA_imm,     "DFMA (imm)",     "0011 011- 0111 ----")
+INST(DMNMX_reg,    "DMNMX (reg)",    "0101 1100 0101 0---")
+INST(DMNMX_cbuf,   "DMNMX (cbuf)",   "0100 1100 0101 0---")
+INST(DMNMX_imm,    "DMNMX (imm)",    "0011 100- 0101 0---")
+INST(DMUL_reg,     "DMUL (reg)",     "0101 1100 1000 0---")
+INST(DMUL_cbuf,    "DMUL (cbuf)",    "0100 1100 1000 0---")
+INST(DMUL_imm,     "DMUL (imm)",     "0011 100- 1000 0---")
+INST(DSET_reg,     "DSET (reg)",     "0101 1001 0--- ----")
+INST(DSET_cbuf,    "DSET (cbuf)",    "0100 1001 0--- ----")
+INST(DSET_imm,     "DSET (imm)",     "0011 001- 0--- ----")
+INST(DSETP_reg,    "DSETP (reg)",    "0101 1011 1000 ----")
+INST(DSETP_cbuf,   "DSETP (cbuf)",   "0100 1011 1000 ----")
+INST(DSETP_imm,    "DSETP (imm)",    "0011 011- 1000 ----")
+INST(EXIT,         "EXIT",           "1110 0011 0000 ----")
+INST(F2F_reg,      "F2F (reg)",      "0101 1100 1010 1---")
+INST(F2F_cbuf,     "F2F (cbuf)",     "0100 1100 1010 1---")
+INST(F2F_imm,      "F2F (imm)",      "0011 100- 1010 1---")
+INST(F2I_reg,      "F2I (reg)",      "0101 1100 1011 0---")
+INST(F2I_cbuf,     "F2I (cbuf)",     "0100 1100 1011 0---")
+INST(F2I_imm,      "F2I (imm)",      "0011 100- 1011 0---")
+INST(FADD_reg,     "FADD (reg)",     "0101 1100 0101 1---")
+INST(FADD_cbuf,    "FADD (cbuf)",    "0100 1100 0101 1---")
+INST(FADD_imm,     "FADD (imm)",     "0011 100- 0101 1---")
+INST(FADD32I,      "FADD32I",        "0000 10-- ---- ----")
+INST(FCHK_reg,     "FCHK (reg)",     "0101 1100 1000 1---")
+INST(FCHK_cbuf,    "FCHK (cbuf)",    "0100 1100 1000 1---")
+INST(FCHK_imm,     "FCHK (imm)",     "0011 100- 1000 1---")
+INST(FCMP_reg,     "FCMP (reg)",     "0101 1011 1010 ----")
+INST(FCMP_rc,      "FCMP (rc)",      "0101 0011 1010 ----")
+INST(FCMP_cr,      "FCMP (cr)",      "0100 1011 1010 ----")
+INST(FCMP_imm,     "FCMP (imm)",     "0011 011- 1010 ----")
+INST(FFMA_reg,     "FFMA (reg)",     "0101 1001 1--- ----")
+INST(FFMA_rc,      "FFMA (rc)",      "0101 0001 1--- ----")
+INST(FFMA_cr,      "FFMA (cr)",      "0100 1001 1--- ----")
+INST(FFMA_imm,     "FFMA (imm)",     "0011 001- 1--- ----")
+INST(FFMA32I,      "FFMA32I",        "0000 11-- ---- ----")
+INST(FLO_reg,      "FLO (reg)",      "0101 1100 0011 0---")
+INST(FLO_cbuf,     "FLO (cbuf)",     "0100 1100 0011 0---")
+INST(FLO_imm,      "FLO (imm)",      "0011 100- 0011 0---")
+INST(FMNMX_reg,    "FMNMX (reg)",    "0101 1100 0110 0---")
+INST(FMNMX_cbuf,   "FMNMX (cbuf)",   "0100 1100 0110 0---")
+INST(FMNMX_imm,    "FMNMX (imm)",    "0011 100- 0110 0---")
+INST(FMUL_reg,     "FMUL (reg)",     "0101 1100 0110 1---")
+INST(FMUL_cbuf,    "FMUL (cbuf)",    "0100 1100 0110 1---")
+INST(FMUL_imm,     "FMUL (imm)",     "0011 100- 0110 1---")
+INST(FMUL32I,      "FMUL32I",        "0001 1110 ---- ----")
+INST(FSET_reg,     "FSET (reg)",     "0101 1000 ---- ----")
+INST(FSET_cbuf,    "FSET (cbuf)",    "0100 1000 ---- ----")
+INST(FSET_imm,     "FSET (imm)",     "0011 000- ---- ----")
+INST(FSETP_reg,    "FSETP (reg)",    "0101 1011 1011 ----")
+INST(FSETP_cbuf,   "FSETP (cbuf)",   "0100 1011 1011 ----")
+INST(FSETP_imm,    "FSETP (imm)",    "0011 011- 1011 ----")
+INST(FSWZADD,      "FSWZADD",        "0101 0000 1111 1---")
+INST(GETCRSPTR,    "GETCRSPTR",      "1110 0010 1100 ----")
+INST(GETLMEMBASE,  "GETLMEMBASE",    "1110 0010 1101 ----")
+INST(HADD2_reg,    "HADD2 (reg)",    "0101 1101 0001 0---")
+INST(HADD2_cbuf,   "HADD2 (cbuf)",   "0111 101- 1--- ----")
+INST(HADD2_imm,    "HADD2 (imm)",    "0111 101- 0--- ----")
+INST(HADD2_32I,    "HADD2_32I",      "0010 110- ---- ----")
+INST(HFMA2_reg,    "HFMA2 (reg)",    "0101 1101 0000 0---")
+INST(HFMA2_rc,     "HFMA2 (rc)",     "0110 0--- 1--- ----")
+INST(HFMA2_cr,     "HFMA2 (cr)",     "0111 0--- 1--- ----")
+INST(HFMA2_imm,    "HFMA2 (imm)",    "0111 0--- 0--- ----")
+INST(HFMA2_32I,    "HFMA2_32I",      "0010 100- ---- ----")
+INST(HMUL2_reg,    "HMUL2 (reg)",    "0101 1101 0000 1---")
+INST(HMUL2_cbuf,   "HMUL2 (cbuf)",   "0111 100- 1--- ----")
+INST(HMUL2_imm,    "HMUL2 (imm)",    "0111 100- 0--- ----")
+INST(HMUL2_32I,    "HMUL2_32I",      "0010 101- ---- ----")
+INST(HSET2_reg,    "HSET2 (reg)",    "0101 1101 0001 1---")
+INST(HSET2_cbuf,   "HSET2 (cbuf)",   "0111 110- 1--- ----")
+INST(HSET2_imm,    "HSET2 (imm)",    "0111 110- 0--- ----")
+INST(HSETP2_reg,   "HSETP2 (reg)",   "0101 1101 0010 0---")
+INST(HSETP2_cbuf,  "HSETP2 (cbuf)",  "0111 111- 1--- ----")
+INST(HSETP2_imm,   "HSETP2 (imm)",   "0111 111- 0--- ----")
+INST(I2F_reg,      "I2F (reg)",      "0101 1100 1011 1---")
+INST(I2F_cbuf,     "I2F (cbuf)",     "0100 1100 1011 1---")
+INST(I2F_imm,      "I2F (imm)",      "0011 100- 1011 1---")
+INST(I2I_reg,      "I2I (reg)",      "0101 1100 1110 0---")
+INST(I2I_cbuf,     "I2I (cbuf)",     "0100 1100 1110 0---")
+INST(I2I_imm,      "I2I (imm)",      "0011 100- 1110 0---")
+INST(IADD_reg,     "IADD (reg)",     "0101 1100 0001 0---")
+INST(IADD_cbuf,    "IADD (cbuf)",    "0100 1100 0001 0---")
+INST(IADD_imm,     "IADD (imm)",     "0011 100- 0001 0---")
+INST(IADD3_reg,    "IADD3 (reg)",    "0101 1100 1100 ----")
+INST(IADD3_cbuf,   "IADD3 (cbuf)",   "0100 1100 1100 ----")
+INST(IADD3_imm,    "IADD3 (imm)",    "0011 100- 1100 ----")
+INST(IADD32I,      "IADD32I",        "0001 110- ---- ----")
+INST(ICMP_reg,     "ICMP (reg)",     "0101 1011 0100 ----")
+INST(ICMP_rc,      "ICMP (rc)",      "0101 0011 0100 ----")
+INST(ICMP_cr,      "ICMP (cr)",      "0100 1011 0100 ----")
+INST(ICMP_imm,     "ICMP (imm)",     "0011 011- 0100 ----")
+INST(IDE,          "IDE",            "1110 0011 1001 ----")
+INST(IDP_reg,      "IDP (reg)",      "0101 0011 1111 1---")
+INST(IDP_imm,      "IDP (imm)",      "0101 0011 1101 1---")
+INST(IMAD_reg,     "IMAD (reg)",     "0101 1010 0--- ----")
+INST(IMAD_rc,      "IMAD (rc)",      "0101 0010 0--- ----")
+INST(IMAD_cr,      "IMAD (cr)",      "0100 1010 0--- ----")
+INST(IMAD_imm,     "IMAD (imm)",     "0011 010- 0--- ----")
+INST(IMAD32I,      "IMAD32I",        "1000 00-- ---- ----")
+INST(IMADSP_reg,   "IMADSP (reg)",   "0101 1010 1--- ----")
+INST(IMADSP_rc,    "IMADSP (rc)",    "0101 0010 1--- ----")
+INST(IMADSP_cr,    "IMADSP (cr)",    "0100 1010 1--- ----")
+INST(IMADSP_imm,   "IMADSP (imm)",   "0011 010- 1--- ----")
+INST(IMNMX_reg,    "IMNMX (reg)",    "0101 1100 0010 0---")
+INST(IMNMX_cbuf,   "IMNMX (cbuf)",   "0100 1100 0010 0---")
+INST(IMNMX_imm,    "IMNMX (imm)",    "0011 100- 0010 0---")
+INST(IMUL_reg,     "IMUL (reg)",     "0101 1100 0011 1---")
+INST(IMUL_cbuf,    "IMUL (cbuf)",    "0100 1100 0011 1---")
+INST(IMUL_imm,     "IMUL (imm)",     "0011 100- 0011 1---")
+INST(IMUL32I,      "IMUL32I",        "0001 1111 ---- ----")
+INST(IPA,          "IPA",            "1110 0000 ---- ----")
+INST(ISBERD,       "ISBERD",         "1110 1111 1101 0---")
+INST(ISCADD_reg,   "ISCADD (reg)",   "0101 1100 0001 1---")
+INST(ISCADD_cbuf,  "ISCADD (cbuf)",  "0100 1100 0001 1---")
+INST(ISCADD_imm,   "ISCADD (imm)",   "0011 100- 0001 1---")
+INST(ISCADD32I,    "ISCADD32I",      "0001 01-- ---- ----")
+INST(ISET_reg,     "ISET (reg)",     "0101 1011 0101 ----")
+INST(ISET_cbuf,    "ISET (cbuf)",    "0100 1011 0101 ----")
+INST(ISET_imm,     "ISET (imm)",     "0011 011- 0101 ----")
+INST(ISETP_reg,    "ISETP (reg)",    "0101 1011 0110 ----")
+INST(ISETP_cbuf,   "ISETP (cbuf)",   "0100 1011 0110 ----")
+INST(ISETP_imm,    "ISETP (imm)",    "0011 011- 0110 ----")
+INST(JCAL,         "JCAL",           "1110 0010 0010 ----")
+INST(JMP,          "JMP",            "1110 0010 0001 ----")
+INST(JMX,          "JMX",            "1110 0010 0000 ----")
+INST(KIL,          "KIL",            "1110 0011 0011 ----")
+INST(LD,           "LD",             "100- ---- ---- ----")
+INST(LDC,          "LDC",            "1110 1111 1001 0---")
+INST(LDG,          "LDG",            "1110 1110 1101 0---")
+INST(LDL,          "LDL",            "1110 1111 0100 0---")
+INST(LDS,          "LDS",            "1110 1111 0100 1---")
+INST(LEA_hi_reg,   "LEA (hi reg)",   "0101 1011 1101 1---")
+INST(LEA_hi_cbuf,  "LEA (hi cbuf)",  "0001 10-- ---- ----")
+INST(LEA_lo_reg,   "LEA (lo reg)",   "0101 1011 1101 0---")
+INST(LEA_lo_cbuf,  "LEA (lo cbuf)",  "0100 1011 1101 ----")
+INST(LEA_lo_imm,   "LEA (lo imm)",   "0011 011- 1101 0---")
+INST(LEPC,         "LEPC",           "0101 0000 1101 0---")
+INST(LONGJMP,      "LONGJMP",        "1110 0011 0001 ----")
+INST(LOP_reg,      "LOP (reg)",      "0101 1100 0100 0---")
+INST(LOP_cbuf,     "LOP (cbuf)",     "0100 1100 0100 0---")
+INST(LOP_imm,      "LOP (imm)",      "0011 100- 0100 0---")
+INST(LOP3_reg,     "LOP3 (reg)",     "0101 1011 1110 0---")
+INST(LOP3_cbuf,    "LOP3 (cbuf)",    "0000 001- ---- ----")
+INST(LOP3_imm,     "LOP3 (imm)",     "0011 11-- ---- ----")
+INST(LOP32I,       "LOP32I",         "0000 01-- ---- ----")
+INST(MEMBAR,       "MEMBAR",         "1110 1111 1001 1---")
+INST(MOV_reg,      "MOV (reg)",      "0101 1100 1001 1---")
+INST(MOV_cbuf,     "MOV (cbuf)",     "0100 1100 1001 1---")
+INST(MOV_imm,      "MOV (imm)",      "0011 100- 1001 1---")
+INST(MOV32I,       "MOV32I",         "0000 0001 0000 ----")
+INST(MUFU,         "MUFU",           "0101 0000 1000 0---")
+INST(NOP,          "NOP",            "0101 0000 1011 0---")
+INST(OUT_reg,      "OUT (reg)",      "1111 1011 1110 0---")
+INST(OUT_cbuf,     "OUT (cbuf)",     "1110 1011 1110 0---")
+INST(OUT_imm,      "OUT (imm)",      "1111 011- 1110 0---")
+INST(P2R_reg,      "P2R (reg)",      "0101 1100 1110 1---")
+INST(P2R_cbuf,     "P2R (cbuf)",     "0100 1100 1110 1---")
+INST(P2R_imm,      "P2R (imm)",      "0011 1000 1110 1---")
+INST(PBK,          "PBK",            "1110 0010 1010 ----")
+INST(PCNT,         "PCNT",           "1110 0010 1011 ----")
+INST(PEXIT,        "PEXIT",          "1110 0010 0011 ----")
+INST(PIXLD,        "PIXLD",          "1110 1111 1110 1---")
+INST(PLONGJMP,     "PLONGJMP",       "1110 0010 1000 ----")
+INST(POPC_reg,     "POPC (reg)",     "0101 1100 0000 1---")
+INST(POPC_cbuf,    "POPC (cbuf)",    "0100 1100 0000 1---")
+INST(POPC_imm,     "POPC (imm)",     "0011 100- 0000 1---")
+INST(PRET,         "PRET",           "1110 0010 0111 ----")
+INST(PRMT_reg,     "PRMT (reg)",     "0101 1011 1100 ----")
+INST(PRMT_rc,      "PRMT (rc)",      "0101 0011 1100 ----")
+INST(PRMT_cr,      "PRMT (cr)",      "0100 1011 1100 ----")
+INST(PRMT_imm,     "PRMT (imm)",     "0011 011- 1100 ----")
+INST(PSET,         "PSET",           "0101 0000 1000 1---")
+INST(PSETP,        "PSETP",          "0101 0000 1001 0---")
+INST(R2B,          "R2B",            "1111 0000 1100 0---")
+INST(R2P_reg,      "R2P (reg)",      "0101 1100 1111 0---")
+INST(R2P_cbuf,     "R2P (cbuf)",     "0100 1100 1111 0---")
+INST(R2P_imm,      "R2P (imm)",      "0011 100- 1111 0---")
+INST(RAM,          "RAM",            "1110 0011 1000 ----")
+INST(RED,          "RED",            "1110 1011 1111 1---")
+INST(RET,          "RET",            "1110 0011 0010 ----")
+INST(RRO_reg,      "RRO (reg)",      "0101 1100 1001 0---")
+INST(RRO_cbuf,     "RRO (cbuf)",     "0100 1100 1001 0---")
+INST(RRO_imm,      "RRO (imm)",      "0011 100- 1001 0---")
+INST(RTT,          "RTT",            "1110 0011 0110 ----")
+INST(S2R,          "S2R",            "1111 0000 1100 1---")
+INST(SAM,          "SAM",            "1110 0011 0111 ----")
+INST(SEL_reg,      "SEL (reg)",      "0101 1100 1010 0---")
+INST(SEL_cbuf,     "SEL (cbuf)",     "0100 1100 1010 0---")
+INST(SEL_imm,      "SEL (imm)",      "0011 100- 1010 0---")
+INST(SETCRSPTR,    "SETCRSPTR",      "1110 0010 1110 ----")
+INST(SETLMEMBASE,  "SETLMEMBASE",    "1110 0010 1111 ----")
+INST(SHF_l_reg,    "SHF (l reg)",    "0101 1011 1111 1---")
+INST(SHF_l_imm,    "SHF (l imm)",    "0011 011- 1111 1---")
+INST(SHF_r_reg,    "SHF (r reg)",    "0101 1100 1111 1---")
+INST(SHF_r_imm,    "SHF (r imm)",    "0011 100- 1111 1---")
+INST(SHFL,         "SHFL",           "1110 1111 0001 0---")
+INST(SHL_reg,      "SHL (reg)",      "0101 1100 0100 1---")
+INST(SHL_cbuf,     "SHL (cbuf)",     "0100 1100 0100 1---")
+INST(SHL_imm,      "SHL (imm)",      "0011 100- 0100 1---")
+INST(SHR_reg,      "SHR (reg)",      "0101 1100 0010 1---")
+INST(SHR_cbuf,     "SHR (cbuf)",     "0100 1100 0010 1---")
+INST(SHR_imm,      "SHR (imm)",      "0011 100- 0010 1---")
+INST(SSY,          "SSY",            "1110 0010 1001 ----")
+INST(ST,           "ST",             "101- ---- ---- ----")
+INST(STG,          "STG",            "1110 1110 1101 1---")
+INST(STL,          "STL",            "1110 1111 0101 0---")
+INST(STP,          "STP",            "1110 1110 1010 0---")
+INST(STS,          "STS",            "1110 1111 0101 1---")
+INST(SUATOM,       "SUATOM",         "1110 1010 0--- ----")
+INST(SUATOM_cas,   "SUATOM_cas",     "1110 1010 1--- ----")
+INST(SULD,         "SULD",           "1110 1011 000- ----")
+INST(SURED,        "SURED",          "1110 1011 010- ----")
+INST(SUST,         "SUST",           "1110 1011 001- ----")
+INST(SYNC,         "SYNC",           "1111 0000 1111 1---")
+INST(TEX,          "TEX",            "1100 0--- ---- ----")
+INST(TEX_b,        "TEX (b)",        "1101 1110 10-- ----")
+INST(TEXS,         "TEXS",           "1101 -00- ---- ----")
+INST(TLD,          "TLD",            "1101 1100 ---- ----")
+INST(TLD_b,        "TLD (b)",        "1101 1101 ---- ----")
+INST(TLD4,         "TLD4",           "1100 10-- ---- ----")
+INST(TLD4_b,       "TLD4 (b)",       "1101 1110 11-- ----")
+INST(TLD4S,        "TLD4S",          "1101 1111 -0-- ----")
+INST(TLDS,         "TLDS",           "1101 -01- ---- ----")
+INST(TMML,         "TMML",           "1101 1111 0101 1---")
+INST(TMML_b,       "TMML (b)",       "1101 1111 0110 0---")
+INST(TXA,          "TXA",            "1101 1111 0100 0---")
+INST(TXD,          "TXD",            "1101 1110 00-- ----")
+INST(TXD_b,        "TXD (b)",        "1101 1110 01-- ----")
+INST(TXQ,          "TXQ",            "1101 1111 0100 1---")
+INST(TXQ_b,        "TXQ (b)",        "1101 1111 0101 0---")
+INST(VABSDIFF,     "VABSDIFF",       "0101 0100 ---- ----")
+INST(VABSDIFF4,    "VABSDIFF4",      "0101 0000 0--- ----")
+INST(VADD,         "VADD",           "0010 00-- ---- ----")
+INST(VMAD,         "VMAD",           "0101 1111 ---- ----")
+INST(VMNMX,        "VMNMX",          "0011 101- ---- ----")
+INST(VOTE,         "VOTE",           "0101 0000 1101 1---")
+INST(VOTE_vtg,     "VOTE (vtg)",     "0101 0000 1110 0---")
+INST(VSET,         "VSET",           "0100 000- ---- ----")
+INST(VSETP,        "VSETP",          "0101 0000 1111 0---")
+INST(VSHL,         "VSHL",           "0101 0111 ---- ----")
+INST(VSHR,         "VSHR",           "0101 0110 ---- ----")
+INST(XMAD_reg,     "XMAD (reg)",     "0101 1011 00-- ----")
+INST(XMAD_rc,      "XMAD (rc)",      "0101 0001 0--- ----")
+INST(XMAD_cr,      "XMAD (cr)",      "0100 111- ---- ----")
+INST(XMAD_imm,     "XMAD (imm)",     "0011 011- 00-- ----")
+
+// Removed due to its weird formatting making fast tables larger
+// INST(CCTLT,        "CCTLT",          "1110 1011 1111 0--0")
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
new file mode 100644
index 000000000..ccc40c20c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+namespace {
+constexpr std::array NAME_TABLE{
+#define INST(name, cute, encode) cute,
+#include "maxwell.inc"
+#undef INST
+};
+} // Anonymous namespace
+
+const char* NameOf(Opcode opcode) {
+    if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
+        throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
+    }
+    return NAME_TABLE[static_cast<size_t>(opcode)];
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h
new file mode 100644
index 000000000..cd574f29d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.h
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+namespace Shader::Maxwell {
+
+enum class Opcode {
+#define INST(name, cute, encode) name,
+#include "maxwell.inc"
+#undef INST
+};
+
+const char* NameOf(Opcode opcode);
+
+} // namespace Shader::Maxwell
+
+template <>
+struct fmt::formatter<Shader::Maxwell::Opcode> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    template <typename FormatContext>
+    auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
+        return format_to(ctx.out(), "{}", NameOf(opcode));
+    }
+};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
new file mode 100644
index 000000000..8b3e0a15c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -0,0 +1,883 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <version>
+
+#include <fmt/format.h>
+
+#include <boost/intrusive/list.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+namespace {
+struct Statement;
+
+// Use normal_link because we are not guaranteed to destroy the tree in order
+using ListBaseHook =
+    boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
+
+using Tree = boost::intrusive::list<Statement,
+                                    // Allow using Statement without a definition
+                                    boost::intrusive::base_hook<ListBaseHook>,
+                                    // Avoid linear complexity on splice, size is never called
+                                    boost::intrusive::constant_time_size<false>>;
+using Node = Tree::iterator;
+
+enum class StatementType {
+    Code,
+    Goto,
+    Label,
+    If,
+    Loop,
+    Break,
+    Return,
+    Kill,
+    Unreachable,
+    Function,
+    Identity,
+    Not,
+    Or,
+    SetVariable,
+    SetIndirectBranchVariable,
+    Variable,
+    IndirectBranchCond,
+};
+
+bool HasChildren(StatementType type) {
+    switch (type) {
+    case StatementType::If:
+    case StatementType::Loop:
+    case StatementType::Function:
+        return true;
+    default:
+        return false;
+    }
+}
+
+struct Goto {};
+struct Label {};
+struct If {};
+struct Loop {};
+struct Break {};
+struct Return {};
+struct Kill {};
+struct Unreachable {};
+struct FunctionTag {};
+struct Identity {};
+struct Not {};
+struct Or {};
+struct SetVariable {};
+struct SetIndirectBranchVariable {};
+struct Variable {};
+struct IndirectBranchCond {};
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
+#endif
+struct Statement : ListBaseHook {
+    Statement(const Flow::Block* block_, Statement* up_)
+        : block{block_}, up{up_}, type{StatementType::Code} {}
+    Statement(Goto, Statement* cond_, Node label_, Statement* up_)
+        : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
+    Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
+    Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
+        : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
+    Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
+        : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
+    Statement(Break, Statement* cond_, Statement* up_)
+        : cond{cond_}, up{up_}, type{StatementType::Break} {}
+    Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
+    Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
+    Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
+    Statement(FunctionTag) : children{}, type{StatementType::Function} {}
+    Statement(Identity, IR::Condition cond_, Statement* up_)
+        : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
+    Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
+    Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
+        : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
+    Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
+        : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
+    Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
+        : branch_offset{branch_offset_},
+          branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
+    Statement(Variable, u32 id_, Statement* up_)
+        : id{id_}, up{up_}, type{StatementType::Variable} {}
+    Statement(IndirectBranchCond, u32 location_, Statement* up_)
+        : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
+
+    ~Statement() {
+        if (HasChildren(type)) {
+            std::destroy_at(&children);
+        }
+    }
+
+    union {
+        const Flow::Block* block;
+        Node label;
+        Tree children;
+        IR::Condition guest_cond;
+        Statement* op;
+        Statement* op_a;
+        u32 location;
+        s32 branch_offset;
+    };
+    union {
+        Statement* cond;
+        Statement* op_b;
+        u32 id;
+        IR::Reg branch_reg;
+    };
+    Statement* up{};
+    StatementType type;
+};
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+std::string DumpExpr(const Statement* stmt) {
+    switch (stmt->type) {
+    case StatementType::Identity:
+        return fmt::format("{}", stmt->guest_cond);
+    case StatementType::Not:
+        return fmt::format("!{}", DumpExpr(stmt->op));
+    case StatementType::Or:
+        return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
+    case StatementType::Variable:
+        return fmt::format("goto_L{}", stmt->id);
+    case StatementType::IndirectBranchCond:
+        return fmt::format("(indirect_branch == {:x})", stmt->location);
+    default:
+        return "<invalid type>";
+    }
+}
+
+[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) {
+    std::string ret;
+    std::string indent(indentation, ' ');
+    for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
+        switch (stmt->type) {
+        case StatementType::Code:
+            ret += fmt::format("{}    Block {:04x} -> {:04x} (0x{:016x});\n", indent,
+                               stmt->block->begin.Offset(), stmt->block->end.Offset(),
+                               reinterpret_cast<uintptr_t>(stmt->block));
+            break;
+        case StatementType::Goto:
+            ret += fmt::format("{}    if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
+                               stmt->label->id);
+            break;
+        case StatementType::Label:
+            ret += fmt::format("{}L{}:\n", indent, stmt->id);
+            break;
+        case StatementType::If:
+            ret += fmt::format("{}    if ({}) {{\n", indent, DumpExpr(stmt->cond));
+            ret += DumpTree(stmt->children, indentation + 4);
+            ret += fmt::format("{}    }}\n", indent);
+            break;
+        case StatementType::Loop:
+            ret += fmt::format("{}    do {{\n", indent);
+            ret += DumpTree(stmt->children, indentation + 4);
+            ret += fmt::format("{}    }} while ({});\n", indent, DumpExpr(stmt->cond));
+            break;
+        case StatementType::Break:
+            ret += fmt::format("{}    if ({}) break;\n", indent, DumpExpr(stmt->cond));
+            break;
+        case StatementType::Return:
+            ret += fmt::format("{}    return;\n", indent);
+            break;
+        case StatementType::Kill:
+            ret += fmt::format("{}    kill;\n", indent);
+            break;
+        case StatementType::Unreachable:
+            ret += fmt::format("{}    unreachable;\n", indent);
+            break;
+        case StatementType::SetVariable:
+            ret += fmt::format("{}    goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
+            break;
+        case StatementType::SetIndirectBranchVariable:
+            ret += fmt::format("{}    indirect_branch = {} + {};\n", indent, stmt->branch_reg,
+                               stmt->branch_offset);
+            break;
+        case StatementType::Function:
+        case StatementType::Identity:
+        case StatementType::Not:
+        case StatementType::Or:
+        case StatementType::Variable:
+        case StatementType::IndirectBranchCond:
+            throw LogicError("Statement can't be printed");
+        }
+    }
+    return ret;
+}
+
+void SanitizeNoBreaks(const Tree& tree) {
+    if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
+        throw NotImplementedException("Capturing statement with break nodes");
+    }
+}
+
+size_t Level(Node stmt) {
+    size_t level{0};
+    Statement* node{stmt->up};
+    while (node) {
+        ++level;
+        node = node->up;
+    }
+    return level;
+}
+
+bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
+    const size_t goto_level{Level(goto_stmt)};
+    const size_t label_level{Level(label_stmt)};
+    size_t min_level;
+    size_t max_level;
+    Node min;
+    Node max;
+    if (label_level < goto_level) {
+        min_level = label_level;
+        max_level = goto_level;
+        min = label_stmt;
+        max = goto_stmt;
+    } else { // goto_level < label_level
+        min_level = goto_level;
+        max_level = label_level;
+        min = goto_stmt;
+        max = label_stmt;
+    }
+    while (max_level > min_level) {
+        --max_level;
+        max = max->up;
+    }
+    return min->up == max->up;
+}
+
+bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
+    return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
+}
+
+[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
+    Node it{goto_stmt};
+    do {
+        if (it == label_stmt) {
+            return true;
+        }
+        --it;
+    } while (it != goto_stmt->up->children.begin());
+    while (it != goto_stmt->up->children.end()) {
+        if (it == label_stmt) {
+            return true;
+        }
+        ++it;
+    }
+    return false;
+}
+
+Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
+    Statement* const parent{uncle->up};
+    Statement* it{&*nephew};
+    while (it->up != parent) {
+        it = it->up;
+    }
+    return Tree::s_iterator_to(*it);
+}
+
+bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
+    const Node end{right_sibling->up->children.end()};
+    for (auto it = right_sibling; it != end; ++it) {
+        if (it == left_sibling) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
+    const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
+    return AreOrdered(sibling, goto_stmt);
+}
+
+class GotoPass {
+public:
+    explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
+        std::vector gotos{BuildTree(cfg)};
+        const auto end{gotos.rend()};
+        for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
+            RemoveGoto(*goto_stmt);
+        }
+    }
+
+    Statement& RootStatement() noexcept {
+        return root_stmt;
+    }
+
+private:
+    void RemoveGoto(Node goto_stmt) {
+        // Force goto_stmt and label_stmt to be directly related
+        const Node label_stmt{goto_stmt->label};
+        if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
+            // Move goto_stmt out using outward-movement transformation until it becomes
+            // directly related to label_stmt
+            while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
+                goto_stmt = MoveOutward(goto_stmt);
+            }
+        }
+        // Force goto_stmt and label_stmt to be siblings
+        if (IsDirectlyRelated(goto_stmt, label_stmt)) {
+            const size_t label_level{Level(label_stmt)};
+            size_t goto_level{Level(goto_stmt)};
+            if (goto_level > label_level) {
+                // Move goto_stmt out of its level using outward-movement transformations
+                while (goto_level > label_level) {
+                    goto_stmt = MoveOutward(goto_stmt);
+                    --goto_level;
+                }
+            } else { // Level(goto_stmt) < Level(label_stmt)
+                if (NeedsLift(goto_stmt, label_stmt)) {
+                    // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
+                    // transformations
+                    goto_stmt = Lift(goto_stmt);
+                }
+                // Move goto_stmt into label_stmt's level using inward-movement transformation
+                while (goto_level < label_level) {
+                    goto_stmt = MoveInward(goto_stmt);
+                    ++goto_level;
+                }
+            }
+        }
+        // Expensive operation:
+        // if (!AreSiblings(goto_stmt, label_stmt)) {
+        //     throw LogicError("Goto is not a sibling with the label");
+        // }
+        // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
+        if (std::next(goto_stmt) == label_stmt) {
+            // Simply eliminate the goto if the label is next to it
+            goto_stmt->up->children.erase(goto_stmt);
+        } else if (AreOrdered(goto_stmt, label_stmt)) {
+            // Eliminate goto_stmt with a conditional
+            EliminateAsConditional(goto_stmt, label_stmt);
+        } else {
+            // Eliminate goto_stmt with a loop
+            EliminateAsLoop(goto_stmt, label_stmt);
+        }
+    }
+
+    std::vector<Node> BuildTree(Flow::CFG& cfg) {
+        u32 label_id{0};
+        std::vector<Node> gotos;
+        Flow::Function& first_function{cfg.Functions().front()};
+        BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
+        return gotos;
+    }
+
+    void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
+                   std::vector<Node>& gotos, Node function_insert_point,
+                   std::optional<Node> return_label) {
+        Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
+        Tree& root{root_stmt.children};
+        std::unordered_map<Flow::Block*, Node> local_labels;
+        local_labels.reserve(function.blocks.size());
+
+        for (Flow::Block& block : function.blocks) {
+            Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
+            const Node label_it{root.insert(function_insert_point, *label)};
+            local_labels.emplace(&block, label_it);
+            ++label_id;
+        }
+        for (Flow::Block& block : function.blocks) {
+            const Node label{local_labels.at(&block)};
+            // Insertion point
+            const Node ip{std::next(label)};
+
+            // Reset goto variables before the first block and after its respective label
+            const auto make_reset_variable{[&]() -> Statement& {
+                return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
+            }};
+            root.push_front(make_reset_variable());
+            root.insert(ip, make_reset_variable());
+            root.insert(ip, *pool.Create(&block, &root_stmt));
+
+            switch (block.end_class) {
+            case Flow::EndClass::Branch: {
+                Statement* const always_cond{
+                    pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
+                if (block.cond == IR::Condition{true}) {
+                    const Node true_label{local_labels.at(block.branch_true)};
+                    gotos.push_back(
+                        root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
+                } else if (block.cond == IR::Condition{false}) {
+                    const Node false_label{local_labels.at(block.branch_false)};
+                    gotos.push_back(root.insert(
+                        ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
+                } else {
+                    const Node true_label{local_labels.at(block.branch_true)};
+                    const Node false_label{local_labels.at(block.branch_false)};
+                    Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
+                    gotos.push_back(
+                        root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
+                    gotos.push_back(root.insert(
+                        ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
+                }
+                break;
+            }
+            case Flow::EndClass::IndirectBranch:
+                root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
+                                             block.branch_offset, &root_stmt));
+                for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
+                    const Node indirect_label{local_labels.at(indirect.block)};
+                    Statement* cond{
+                        pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
+                    Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
+                    gotos.push_back(root.insert(ip, *goto_stmt));
+                }
+                root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
+                break;
+            case Flow::EndClass::Call: {
+                Flow::Function& call{cfg.Functions()[block.function_call]};
+                const Node call_return_label{local_labels.at(block.return_block)};
+                BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
+                break;
+            }
+            case Flow::EndClass::Exit:
+                root.insert(ip, *pool.Create(Return{}, &root_stmt));
+                break;
+            case Flow::EndClass::Return: {
+                Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
+                auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
+                gotos.push_back(root.insert(ip, *goto_stmt));
+                break;
+            }
+            case Flow::EndClass::Kill:
+                root.insert(ip, *pool.Create(Kill{}, &root_stmt));
+                break;
+            }
+        }
+    }
+
+    void UpdateTreeUp(Statement* tree) {
+        for (Statement& stmt : tree->children) {
+            stmt.up = tree;
+        }
+    }
+
+    void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
+        Tree& body{goto_stmt->up->children};
+        Tree if_body;
+        if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
+        Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
+        Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
+        UpdateTreeUp(if_stmt);
+        body.insert(goto_stmt, *if_stmt);
+        body.erase(goto_stmt);
+    }
+
+    void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
+        Tree& body{goto_stmt->up->children};
+        Tree loop_body;
+        loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
+        Statement* const cond{goto_stmt->cond};
+        Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
+        UpdateTreeUp(loop);
+        body.insert(goto_stmt, *loop);
+        body.erase(goto_stmt);
+    }
+
+    [[nodiscard]] Node MoveOutward(Node goto_stmt) {
+        switch (goto_stmt->up->type) {
+        case StatementType::If:
+            return MoveOutwardIf(goto_stmt);
+        case StatementType::Loop:
+            return MoveOutwardLoop(goto_stmt);
+        default:
+            throw LogicError("Invalid outward movement");
+        }
+    }
+
+    [[nodiscard]] Node MoveInward(Node goto_stmt) {
+        Statement* const parent{goto_stmt->up};
+        Tree& body{parent->children};
+        const Node label{goto_stmt->label};
+        const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
+        const u32 label_id{label->id};
+
+        Statement* const goto_cond{goto_stmt->cond};
+        Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+        body.insert(goto_stmt, *set_var);
+
+        Tree if_body;
+        if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
+        Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
+        Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
+        if (!if_body.empty()) {
+            Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
+            UpdateTreeUp(if_stmt);
+            body.insert(goto_stmt, *if_stmt);
+        }
+        body.erase(goto_stmt);
+
+        switch (label_nested_stmt->type) {
+        case StatementType::If:
+            // Update nested if condition
+            label_nested_stmt->cond =
+                pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
+            break;
+        case StatementType::Loop:
+            break;
+        default:
+            throw LogicError("Invalid inward movement");
+        }
+        Tree& nested_tree{label_nested_stmt->children};
+        Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
+        return nested_tree.insert(nested_tree.begin(), *new_goto);
+    }
+
+    [[nodiscard]] Node Lift(Node goto_stmt) {
+        Statement* const parent{goto_stmt->up};
+        Tree& body{parent->children};
+        const Node label{goto_stmt->label};
+        const u32 label_id{label->id};
+        const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
+
+        Tree loop_body;
+        loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
+        SanitizeNoBreaks(loop_body);
+        Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
+        Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
+        UpdateTreeUp(loop_stmt);
+        body.insert(goto_stmt, *loop_stmt);
+
+        Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
+        loop_stmt->children.push_front(*new_goto);
+        const Node new_goto_node{loop_stmt->children.begin()};
+
+        Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
+        loop_stmt->children.push_back(*set_var);
+
+        body.erase(goto_stmt);
+        return new_goto_node;
+    }
+
+    Node MoveOutwardIf(Node goto_stmt) {
+        const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
+        Tree& body{parent->children};
+        const u32 label_id{goto_stmt->label->id};
+        Statement* const goto_cond{goto_stmt->cond};
+        Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
+        body.insert(goto_stmt, *set_goto_var);
+
+        Tree if_body;
+        if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
+        if_body.pop_front();
+        Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
+        Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
+        Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
+        UpdateTreeUp(if_stmt);
+        body.insert(goto_stmt, *if_stmt);
+
+        body.erase(goto_stmt);
+
+        Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
+        Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
+        Tree& parent_tree{parent->up->children};
+        return parent_tree.insert(std::next(parent), *new_goto);
+    }
+
+    Node MoveOutwardLoop(Node goto_stmt) {
+        Statement* const parent{goto_stmt->up};
+        Tree& body{parent->children};
+        const u32 label_id{goto_stmt->label->id};
+        Statement* const goto_cond{goto_stmt->cond};
+        Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+        Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
+        Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
+        body.insert(goto_stmt, *set_goto_var);
+        body.insert(goto_stmt, *break_stmt);
+        body.erase(goto_stmt);
+
+        const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
+        Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
+        Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
+        Tree& parent_tree{loop->up->children};
+        return parent_tree.insert(std::next(loop), *new_goto);
+    }
+
+    ObjectPool<Statement>& pool;
+    Statement root_stmt{FunctionTag{}};
+};
+
+[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
+    Tree& tree{stmt.up->children};
+    const Node end{tree.end()};
+    Node forward_node{std::next(Tree::s_iterator_to(stmt))};
+    while (forward_node != end && !HasChildren(forward_node->type)) {
+        if (forward_node->type == StatementType::Code) {
+            return &*forward_node;
+        }
+        ++forward_node;
+    }
+    return nullptr;
+}
+
+[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
+    switch (stmt.type) {
+    case StatementType::Identity:
+        return ir.Condition(stmt.guest_cond);
+    case StatementType::Not:
+        return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
+    case StatementType::Or:
+        return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
+    case StatementType::Variable:
+        return ir.GetGotoVariable(stmt.id);
+    case StatementType::IndirectBranchCond:
+        return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
+    default:
+        throw NotImplementedException("Statement type {}", stmt.type);
+    }
+}
+
+class TranslatePass {
+public:
+    TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
+                  ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
+                  IR::AbstractSyntaxList& syntax_list_)
+        : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
+          syntax_list{syntax_list_} {
+        Visit(root_stmt, nullptr, nullptr);
+
+        IR::Block& first_block{*syntax_list.front().data.block};
+        IR::IREmitter ir(first_block, first_block.begin());
+        ir.Prologue();
+    }
+
+private:
+    void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
+        IR::Block* current_block{};
+        const auto ensure_block{[&] {
+            if (current_block) {
+                return;
+            }
+            current_block = block_pool.Create(inst_pool);
+            auto& node{syntax_list.emplace_back()};
+            node.type = IR::AbstractSyntaxNode::Type::Block;
+            node.data.block = current_block;
+        }};
+        Tree& tree{parent.children};
+        for (auto it = tree.begin(); it != tree.end(); ++it) {
+            Statement& stmt{*it};
+            switch (stmt.type) {
+            case StatementType::Label:
+                // Labels can be ignored
+                break;
+            case StatementType::Code: {
+                ensure_block();
+                Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
+                break;
+            }
+            case StatementType::SetVariable: {
+                ensure_block();
+                IR::IREmitter ir{*current_block};
+                ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
+                break;
+            }
+            case StatementType::SetIndirectBranchVariable: {
+                ensure_block();
+                IR::IREmitter ir{*current_block};
+                IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
+                ir.SetIndirectBranchVariable(address);
+                break;
+            }
+            case StatementType::If: {
+                ensure_block();
+                IR::Block* const merge_block{MergeBlock(parent, stmt)};
+
+                // Implement if header block
+                IR::IREmitter ir{*current_block};
+                const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+
+                const size_t if_node_index{syntax_list.size()};
+                syntax_list.emplace_back();
+
+                // Visit children
+                const size_t then_block_index{syntax_list.size()};
+                Visit(stmt, break_block, merge_block);
+
+                IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
+                current_block->AddBranch(then_block);
+                current_block->AddBranch(merge_block);
+                current_block = merge_block;
+
+                auto& if_node{syntax_list[if_node_index]};
+                if_node.type = IR::AbstractSyntaxNode::Type::If;
+                if_node.data.if_node.cond = cond;
+                if_node.data.if_node.body = then_block;
+                if_node.data.if_node.merge = merge_block;
+
+                auto& endif_node{syntax_list.emplace_back()};
+                endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
+                endif_node.data.end_if.merge = merge_block;
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.data.block = merge_block;
+                break;
+            }
+            case StatementType::Loop: {
+                IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
+                if (current_block) {
+                    current_block->AddBranch(loop_header_block);
+                }
+                auto& header_node{syntax_list.emplace_back()};
+                header_node.type = IR::AbstractSyntaxNode::Type::Block;
+                header_node.data.block = loop_header_block;
+
+                IR::Block* const continue_block{block_pool.Create(inst_pool)};
+                IR::Block* const merge_block{MergeBlock(parent, stmt)};
+
+                const size_t loop_node_index{syntax_list.size()};
+                syntax_list.emplace_back();
+
+                // Visit children
+                const size_t body_block_index{syntax_list.size()};
+                Visit(stmt, merge_block, continue_block);
+
+                // The continue block is located at the end of the loop
+                IR::IREmitter ir{*continue_block};
+                const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+
+                IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
+                loop_header_block->AddBranch(body_block);
+
+                continue_block->AddBranch(loop_header_block);
+                continue_block->AddBranch(merge_block);
+
+                current_block = merge_block;
+
+                auto& loop{syntax_list[loop_node_index]};
+                loop.type = IR::AbstractSyntaxNode::Type::Loop;
+                loop.data.loop.body = body_block;
+                loop.data.loop.continue_block = continue_block;
+                loop.data.loop.merge = merge_block;
+
+                auto& continue_block_node{syntax_list.emplace_back()};
+                continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
+                continue_block_node.data.block = continue_block;
+
+                auto& repeat{syntax_list.emplace_back()};
+                repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
+                repeat.data.repeat.cond = cond;
+                repeat.data.repeat.loop_header = loop_header_block;
+                repeat.data.repeat.merge = merge_block;
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.data.block = merge_block;
+                break;
+            }
+            case StatementType::Break: {
+                ensure_block();
+                IR::Block* const skip_block{MergeBlock(parent, stmt)};
+
+                IR::IREmitter ir{*current_block};
+                const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+                current_block->AddBranch(break_block);
+                current_block->AddBranch(skip_block);
+                current_block = skip_block;
+
+                auto& break_node{syntax_list.emplace_back()};
+                break_node.type = IR::AbstractSyntaxNode::Type::Break;
+                break_node.data.break_node.cond = cond;
+                break_node.data.break_node.merge = break_block;
+                break_node.data.break_node.skip = skip_block;
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.data.block = skip_block;
+                break;
+            }
+            case StatementType::Return: {
+                ensure_block();
+                IR::IREmitter{*current_block}.Epilogue();
+                current_block = nullptr;
+                syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
+                break;
+            }
+            case StatementType::Kill: {
+                ensure_block();
+                IR::Block* demote_block{MergeBlock(parent, stmt)};
+                IR::IREmitter{*current_block}.DemoteToHelperInvocation();
+                current_block->AddBranch(demote_block);
+                current_block = demote_block;
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.data.block = demote_block;
+                break;
+            }
+            case StatementType::Unreachable: {
+                ensure_block();
+                current_block = nullptr;
+                syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
+                break;
+            }
+            default:
+                throw NotImplementedException("Statement type {}", stmt.type);
+            }
+        }
+        if (current_block) {
+            if (fallthrough_block) {
+                current_block->AddBranch(fallthrough_block);
+            } else {
+                syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
+            }
+        }
+    }
+
+    IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
+        Statement* merge_stmt{TryFindForwardBlock(stmt)};
+        if (!merge_stmt) {
+            // Create a merge block we can visit later
+            merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
+            parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
+        }
+        return block_pool.Create(inst_pool);
+    }
+
+    ObjectPool<Statement>& stmt_pool;
+    ObjectPool<IR::Inst>& inst_pool;
+    ObjectPool<IR::Block>& block_pool;
+    Environment& env;
+    IR::AbstractSyntaxList& syntax_list;
+
+// TODO: C++20 Remove this when all compilers support constexpr std::vector
+#if __cpp_lib_constexpr_vector >= 201907
+    static constexpr Flow::Block dummy_flow_block;
+#else
+    const Flow::Block dummy_flow_block;
+#endif
+};
+} // Anonymous namespace
+
+IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+                                Environment& env, Flow::CFG& cfg) {
+    ObjectPool<Statement> stmt_pool{64};
+    GotoPass goto_pass{cfg, stmt_pool};
+    Statement& root{goto_pass.RootStatement()};
+    IR::AbstractSyntaxList syntax_list;
+    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+    return syntax_list;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
new file mode 100644
index 000000000..88b083649
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -0,0 +1,20 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
+                                              ObjectPool<IR::Block>& block_pool, Environment& env,
+                                              Flow::CFG& cfg);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+    ADD,
+    MIN,
+    MAX,
+    INC,
+    DEC,
+    AND,
+    OR,
+    XOR,
+    EXCH,
+    SAFEADD,
+};
+
+enum class AtomSize : u64 {
+    U32,
+    S32,
+    U64,
+    F32,
+    F16x2,
+    S64,
+};
+
+IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
+                              AtomOp op, bool is_signed) {
+    switch (op) {
+    case AtomOp::ADD:
+        return ir.GlobalAtomicIAdd(offset, op_b);
+    case AtomOp::MIN:
+        return ir.GlobalAtomicIMin(offset, op_b, is_signed);
+    case AtomOp::MAX:
+        return ir.GlobalAtomicIMax(offset, op_b, is_signed);
+    case AtomOp::INC:
+        return ir.GlobalAtomicInc(offset, op_b);
+    case AtomOp::DEC:
+        return ir.GlobalAtomicDec(offset, op_b);
+    case AtomOp::AND:
+        return ir.GlobalAtomicAnd(offset, op_b);
+    case AtomOp::OR:
+        return ir.GlobalAtomicOr(offset, op_b);
+    case AtomOp::XOR:
+        return ir.GlobalAtomicXor(offset, op_b);
+    case AtomOp::EXCH:
+        return ir.GlobalAtomicExchange(offset, op_b);
+    default:
+        throw NotImplementedException("Integer Atom Operation {}", op);
+    }
+}
+
+IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
+                        AtomSize size) {
+    static constexpr IR::FpControl f16_control{
+        .no_contraction = false,
+        .rounding = IR::FpRounding::RN,
+        .fmz_mode = IR::FmzMode::DontCare,
+    };
+    static constexpr IR::FpControl f32_control{
+        .no_contraction = false,
+        .rounding = IR::FpRounding::RN,
+        .fmz_mode = IR::FmzMode::FTZ,
+    };
+    switch (op) {
+    case AtomOp::ADD:
+        return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
+                                     : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
+    case AtomOp::MIN:
+        return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
+    case AtomOp::MAX:
+        return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
+    default:
+        throw NotImplementedException("FP Atom Operation {}", op);
+    }
+}
+
+IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<28, 20, s64> addr_offset;
+        BitField<28, 20, u64> rz_addr_offset;
+        BitField<48, 1, u64> e;
+    } const mem{insn};
+
+    const IR::U64 address{[&]() -> IR::U64 {
+        if (mem.e == 0) {
+            return v.ir.UConvert(64, v.X(mem.addr_reg));
+        }
+        return v.L(mem.addr_reg);
+    }()};
+    const u64 addr_offset{[&]() -> u64 {
+        if (mem.addr_reg == IR::Reg::RZ) {
+            // When RZ is used, the address is an absolute address
+            return static_cast<u64>(mem.rz_addr_offset.Value());
+        } else {
+            return static_cast<u64>(mem.addr_offset.Value());
+        }
+    }()};
+    return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+
+bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
+    // TODO: SAFEADD
+    switch (size) {
+    case AtomSize::S32:
+    case AtomSize::U64:
+        return (op == AtomOp::INC || op == AtomOp::DEC);
+    case AtomSize::S64:
+        return !(op == AtomOp::MIN || op == AtomOp::MAX);
+    case AtomSize::F32:
+        return op != AtomOp::ADD;
+    case AtomSize::F16x2:
+        return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
+    default:
+        return false;
+    }
+}
+
+IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
+    switch (size) {
+    case AtomSize::U32:
+    case AtomSize::S32:
+    case AtomSize::F32:
+    case AtomSize::F16x2:
+        return ir.LoadGlobal32(offset);
+    case AtomSize::U64:
+    case AtomSize::S64:
+        return ir.PackUint2x32(ir.LoadGlobal64(offset));
+    default:
+        throw NotImplementedException("Atom Size {}", size);
+    }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
+    switch (size) {
+    case AtomSize::U32:
+    case AtomSize::S32:
+    case AtomSize::F16x2:
+        return v.X(dest_reg, IR::U32{result});
+    case AtomSize::U64:
+    case AtomSize::S64:
+        return v.L(dest_reg, IR::U64{result});
+    case AtomSize::F32:
+        return v.F(dest_reg, IR::F32{result});
+    default:
+        break;
+    }
+}
+
+IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
+                      AtomSize size, AtomOp op) {
+    switch (size) {
+    case AtomSize::U32:
+    case AtomSize::S32:
+        return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
+    case AtomSize::U64:
+    case AtomSize::S64:
+        return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
+    case AtomSize::F32:
+        return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
+    case AtomSize::F16x2: {
+        return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
+    }
+    default:
+        throw NotImplementedException("Atom Size {}", size);
+    }
+}
+
+void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
+                  const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
+    IR::Value result;
+    if (AtomOpNotApplicable(size, op)) {
+        result = LoadGlobal(v.ir, offset, size);
+    } else {
+        result = ApplyAtomOp(v, operand_reg, offset, size, op);
+    }
+    if (write_dest) {
+        StoreResult(v, dest_reg, result, size);
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOM(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<20, 8, IR::Reg> operand_reg;
+        BitField<49, 3, AtomSize> size;
+        BitField<52, 4, AtomOp> op;
+    } const atom{insn};
+    const IR::U64 offset{AtomOffset(*this, insn)};
+    GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
+}
+
+void TranslatorVisitor::RED(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> operand_reg;
+        BitField<20, 3, AtomSize> size;
+        BitField<23, 3, AtomOp> op;
+    } const red{insn};
+    const IR::U64 offset{AtomOffset(*this, insn)};
+    GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+    ADD,
+    MIN,
+    MAX,
+    INC,
+    DEC,
+    AND,
+    OR,
+    XOR,
+    EXCH,
+};
+
+enum class AtomsSize : u64 {
+    U32,
+    S32,
+    U64,
+};
+
+IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
+                        bool is_signed) {
+    switch (op) {
+    case AtomOp::ADD:
+        return ir.SharedAtomicIAdd(offset, op_b);
+    case AtomOp::MIN:
+        return ir.SharedAtomicIMin(offset, op_b, is_signed);
+    case AtomOp::MAX:
+        return ir.SharedAtomicIMax(offset, op_b, is_signed);
+    case AtomOp::INC:
+        return ir.SharedAtomicInc(offset, op_b);
+    case AtomOp::DEC:
+        return ir.SharedAtomicDec(offset, op_b);
+    case AtomOp::AND:
+        return ir.SharedAtomicAnd(offset, op_b);
+    case AtomOp::OR:
+        return ir.SharedAtomicOr(offset, op_b);
+    case AtomOp::XOR:
+        return ir.SharedAtomicXor(offset, op_b);
+    case AtomOp::EXCH:
+        return ir.SharedAtomicExchange(offset, op_b);
+    default:
+        throw NotImplementedException("Integer Atoms Operation {}", op);
+    }
+}
+
+IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> offset_reg;
+        BitField<30, 22, u64> absolute_offset;
+        BitField<30, 22, s64> relative_offset;
+    } const encoding{insn};
+
+    if (encoding.offset_reg == IR::Reg::RZ) {
+        return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
+    } else {
+        const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
+        return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+    }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
+    switch (size) {
+    case AtomsSize::U32:
+    case AtomsSize::S32:
+        return v.X(dest_reg, IR::U32{result});
+    case AtomsSize::U64:
+        return v.L(dest_reg, IR::U64{result});
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOMS(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 8, IR::Reg> src_reg_b;
+        BitField<28, 2, AtomsSize> size;
+        BitField<52, 4, AtomOp> op;
+    } const atoms{insn};
+
+    const bool size_64{atoms.size == AtomsSize::U64};
+    if (size_64 && atoms.op != AtomOp::EXCH) {
+        throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
+    }
+    const bool is_signed{atoms.size == AtomsSize::S32};
+    const IR::U32 offset{AtomsOffset(*this, insn)};
+
+    IR::Value result;
+    if (size_64) {
+        result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
+    } else {
+        result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
+    }
+    StoreResult(*this, atoms.dest_reg, result, atoms.size);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class BitSize : u64 {
+    B32,
+    B64,
+    B96,
+    B128,
+};
+
+void TranslatorVisitor::AL2P(u64 inst) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> result_register;
+        BitField<8, 8, IR::Reg> indexing_register;
+        BitField<20, 11, s64> offset;
+        BitField<47, 2, BitSize> bitsize;
+    } al2p{inst};
+    if (al2p.bitsize != BitSize::B32) {
+        throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
+    }
+    const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
+    const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
+    X(al2p.result_register, result);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// Seems to be in CUDA terminology.
+enum class LocalScope : u64 {
+    CTA,
+    GL,
+    SYS,
+    VC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MEMBAR(u64 inst) {
+    union {
+        u64 raw;
+        BitField<8, 2, LocalScope> scope;
+    } const membar{inst};
+
+    if (membar.scope == LocalScope::CTA) {
+        ir.WorkgroupMemoryBarrier();
+    } else {
+        ir.DeviceMemoryBarrier();
+    }
+}
+
+void TranslatorVisitor::DEPBAR() {
+    // DEPBAR is a no-op
+}
+
+void TranslatorVisitor::BAR(u64 insn) {
+    enum class Mode {
+        RedPopc,
+        Scan,
+        RedAnd,
+        RedOr,
+        Sync,
+        Arrive,
+    };
+    union {
+        u64 raw;
+        BitField<43, 1, u64> is_a_imm;
+        BitField<44, 1, u64> is_b_imm;
+        BitField<8, 8, u64> imm_a;
+        BitField<20, 12, u64> imm_b;
+        BitField<42, 1, u64> neg_pred;
+        BitField<39, 3, IR::Pred> pred;
+    } const bar{insn};
+
+    const Mode mode{[insn] {
+        switch (insn & 0x0000009B00000000ULL) {
+        case 0x0000000200000000ULL:
+            return Mode::RedPopc;
+        case 0x0000000300000000ULL:
+            return Mode::Scan;
+        case 0x0000000A00000000ULL:
+            return Mode::RedAnd;
+        case 0x0000001200000000ULL:
+            return Mode::RedOr;
+        case 0x0000008000000000ULL:
+            return Mode::Sync;
+        case 0x0000008100000000ULL:
+            return Mode::Arrive;
+        }
+        throw NotImplementedException("Invalid encoding");
+    }()};
+    if (mode != Mode::Sync) {
+        throw NotImplementedException("BAR mode {}", mode);
+    }
+    if (bar.is_a_imm == 0) {
+        throw NotImplementedException("Non-immediate input A");
+    }
+    if (bar.imm_a != 0) {
+        throw NotImplementedException("Non-zero input A");
+    }
+    if (bar.is_b_imm == 0) {
+        throw NotImplementedException("Non-immediate input B");
+    }
+    if (bar.imm_b != 0) {
+        throw NotImplementedException("Non-zero input B");
+    }
+    if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
+        throw NotImplementedException("Non-true input predicate");
+    }
+    ir.Barrier();
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> offset_reg;
+        BitField<40, 1, u64> brev;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> is_signed;
+    } const bfe{insn};
+
+    const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
+    const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+
+    // Common constants
+    const IR::U32 zero{v.ir.Imm32(0)};
+    const IR::U32 one{v.ir.Imm32(1)};
+    const IR::U32 max_size{v.ir.Imm32(32)};
+    // Edge case conditions
+    const IR::U1 zero_count{v.ir.IEqual(count, zero)};
+    const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
+    const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
+
+    IR::U32 base{v.X(bfe.offset_reg)};
+    if (bfe.brev != 0) {
+        base = v.ir.BitReverse(base);
+    }
+    IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
+    if (bfe.is_signed != 0) {
+        const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
+        const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+        const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
+        // Replicate condition
+        result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
+        // Exceeding condition
+        const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
+        result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
+    }
+    // Zero count condition
+    result = IR::U32{v.ir.Select(zero_count, zero, result)};
+
+    v.X(bfe.dest_reg, result);
+
+    if (bfe.cc != 0) {
+        v.SetZFlag(v.ir.IEqual(result, zero));
+        v.SetSFlag(v.ir.ILessThan(result, zero, true));
+        v.ResetCFlag();
+        v.ResetOFlag();
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFE_reg(u64 insn) {
+    BFE(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::BFE_cbuf(u64 insn) {
+    BFE(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFE_imm(u64 insn) {
+    BFE(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> insert_reg;
+        BitField<47, 1, u64> cc;
+    } const bfi{insn};
+
+    const IR::U32 zero{v.ir.Imm32(0)};
+    const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
+    const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+    const IR::U32 max_size{v.ir.Imm32(32)};
+
+    // Edge case conditions
+    const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
+    const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
+
+    const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
+    const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
+
+    const IR::U32 insert{v.X(bfi.insert_reg)};
+    IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
+
+    result = IR::U32{v.ir.Select(exceed_offset, base, result)};
+
+    v.X(bfi.dest_reg, result);
+    if (bfi.cc != 0) {
+        v.SetZFlag(v.ir.IEqual(result, zero));
+        v.SetSFlag(v.ir.ILessThan(result, zero, true));
+        v.ResetCFlag();
+        v.ResetOFlag();
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFI_reg(u64 insn) {
+    BFI(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_rc(u64 insn) {
+    BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFI_cr(u64 insn) {
+    BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_imm(u64 insn) {
+    BFI(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void Check(u64 insn) {
+    union {
+        u64 raw;
+        BitField<5, 1, u64> cbuf_mode;
+        BitField<6, 1, u64> lmt;
+    } const encoding{insn};
+
+    if (encoding.cbuf_mode != 0) {
+        throw NotImplementedException("Constant buffer mode");
+    }
+    if (encoding.lmt != 0) {
+        throw NotImplementedException("LMT");
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BRX(u64 insn) {
+    Check(insn);
+}
+
+void TranslatorVisitor::JMX(u64 insn) {
+    Check(insn);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+
+namespace Shader::Maxwell {
+
+enum class FpRounding : u64 {
+    RN,
+    RM,
+    RP,
+    RZ,
+};
+
+enum class FmzMode : u64 {
+    None,
+    FTZ,
+    FMZ,
+    INVALIDFMZ3,
+};
+
+inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
+    switch (fp_rounding) {
+    case FpRounding::RN:
+        return IR::FpRounding::RN;
+    case FpRounding::RM:
+        return IR::FpRounding::RM;
+    case FpRounding::RP:
+        return IR::FpRounding::RP;
+    case FpRounding::RZ:
+        return IR::FpRounding::RZ;
+    }
+    throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
+}
+
+inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
+    switch (fmz_mode) {
+    case FmzMode::None:
+        return IR::FmzMode::None;
+    case FmzMode::FTZ:
+        return IR::FmzMode::FTZ;
+    case FmzMode::FMZ:
+        // FMZ is manually handled in the instruction
+        return IR::FmzMode::FTZ;
+    case FmzMode::INVALIDFMZ3:
+        break;
+    }
+    throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+
+namespace Shader::Maxwell {
+IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+                      CompareOp compare_op, bool is_signed) {
+    switch (compare_op) {
+    case CompareOp::False:
+        return ir.Imm1(false);
+    case CompareOp::LessThan:
+        return ir.ILessThan(operand_1, operand_2, is_signed);
+    case CompareOp::Equal:
+        return ir.IEqual(operand_1, operand_2);
+    case CompareOp::LessThanEqual:
+        return ir.ILessThanEqual(operand_1, operand_2, is_signed);
+    case CompareOp::GreaterThan:
+        return ir.IGreaterThan(operand_1, operand_2, is_signed);
+    case CompareOp::NotEqual:
+        return ir.INotEqual(operand_1, operand_2);
+    case CompareOp::GreaterThanEqual:
+        return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
+    case CompareOp::True:
+        return ir.Imm1(true);
+    default:
+        throw NotImplementedException("Invalid compare op {}", compare_op);
+    }
+}
+
+IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+                              CompareOp compare_op, bool is_signed) {
+    const IR::U32 zero{ir.Imm32(0)};
+    const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
+    const IR::U1 z_flag{ir.GetZFlag()};
+    const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
+    const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
+                                      : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
+                                                      ir.ILessThan(operand_2, zero, true))};
+    switch (compare_op) {
+    case CompareOp::False:
+        return ir.Imm1(false);
+    case CompareOp::LessThan:
+        return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+                                ir.ILessThan(intermediate, zero, true))};
+    case CompareOp::Equal:
+        return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
+    case CompareOp::LessThanEqual: {
+        const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+                                        ir.ILessThan(intermediate, zero, true))};
+        return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+    }
+    case CompareOp::GreaterThan: {
+        const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
+                                        ir.IGreaterThan(intermediate, zero, true))};
+        const IR::U1 not_z{ir.LogicalNot(z_flag)};
+        return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
+    }
+    case CompareOp::NotEqual:
+        return ir.LogicalOr(ir.INotEqual(intermediate, zero),
+                            ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
+    case CompareOp::GreaterThanEqual: {
+        const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
+                                        ir.IGreaterThanEqual(intermediate, zero, true))};
+        return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+    }
+    case CompareOp::True:
+        return ir.Imm1(true);
+    default:
+        throw NotImplementedException("Invalid compare op {}", compare_op);
+    }
+}
+
+IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
+                        BooleanOp bop) {
+    switch (bop) {
+    case BooleanOp::AND:
+        return ir.LogicalAnd(predicate_1, predicate_2);
+    case BooleanOp::OR:
+        return ir.LogicalOr(predicate_1, predicate_2);
+    case BooleanOp::XOR:
+        return ir.LogicalXor(predicate_1, predicate_2);
+    default:
+        throw NotImplementedException("Invalid bop {}", bop);
+    }
+}
+
+IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
+    switch (op) {
+    case PredicateOp::False:
+        return ir.Imm1(false);
+    case PredicateOp::True:
+        return ir.Imm1(true);
+    case PredicateOp::Zero:
+        return ir.IEqual(result, ir.Imm32(0));
+    case PredicateOp::NonZero:
+        return ir.INotEqual(result, ir.Imm32(0));
+    default:
+        throw NotImplementedException("Invalid Predicate operation {}", op);
+    }
+}
+
+bool IsCompareOpOrdered(FPCompareOp op) {
+    switch (op) {
+    case FPCompareOp::LTU:
+    case FPCompareOp::EQU:
+    case FPCompareOp::LEU:
+    case FPCompareOp::GTU:
+    case FPCompareOp::NEU:
+    case FPCompareOp::GEU:
+        return false;
+    default:
+        return true;
+    }
+}
+
+IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+                            const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+                            IR::FpControl control) {
+    const bool ordered{IsCompareOpOrdered(compare_op)};
+    switch (compare_op) {
+    case FPCompareOp::F:
+        return ir.Imm1(false);
+    case FPCompareOp::LT:
+    case FPCompareOp::LTU:
+        return ir.FPLessThan(operand_1, operand_2, control, ordered);
+    case FPCompareOp::EQ:
+    case FPCompareOp::EQU:
+        return ir.FPEqual(operand_1, operand_2, control, ordered);
+    case FPCompareOp::LE:
+    case FPCompareOp::LEU:
+        return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
+    case FPCompareOp::GT:
+    case FPCompareOp::GTU:
+        return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
+    case FPCompareOp::NE:
+    case FPCompareOp::NEU:
+        return ir.FPNotEqual(operand_1, operand_2, control, ordered);
+    case FPCompareOp::GE:
+    case FPCompareOp::GEU:
+        return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
+    case FPCompareOp::NUM:
+        return ir.FPOrdered(operand_1, operand_2);
+    case FPCompareOp::Nan:
+        return ir.FPUnordered(operand_1, operand_2);
+    case FPCompareOp::T:
+        return ir.Imm1(true);
+    default:
+        throw NotImplementedException("Invalid FP compare op {}", compare_op);
+    }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+                                    const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
+
+[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+                                            const IR::U32& operand_2, CompareOp compare_op,
+                                            bool is_signed);
+
+[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
+                                      const IR::U1& predicate_2, BooleanOp bop);
+
+[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
+
+[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
+
+[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+                                          const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+                                          IR::FpControl control = {});
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+void TranslatorVisitor::CSET(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 5, IR::FlowTest> cc_test;
+        BitField<39, 3, IR::Pred> bop_pred;
+        BitField<42, 1, u64> neg_bop_pred;
+        BitField<44, 1, u64> bf;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<47, 1, u64> cc;
+    } const cset{insn};
+
+    const IR::U32 one_mask{ir.Imm32(-1)};
+    const IR::U32 fp_one{ir.Imm32(0x3f800000)};
+    const IR::U32 zero{ir.Imm32(0)};
+    const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
+    const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
+    const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
+    const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
+    const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
+    X(cset.dest_reg, result);
+    if (cset.cc != 0) {
+        const IR::U1 is_zero{ir.IEqual(result, zero)};
+        SetZFlag(is_zero);
+        if (cset.bf != 0) {
+            ResetSFlag();
+        } else {
+            SetSFlag(ir.LogicalNot(is_zero));
+        }
+        ResetOFlag();
+        ResetCFlag();
+    }
+}
+
+void TranslatorVisitor::CSETP(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 3, IR::Pred> dest_pred_b;
+        BitField<3, 3, IR::Pred> dest_pred_a;
+        BitField<8, 5, IR::FlowTest> cc_test;
+        BitField<39, 3, IR::Pred> bop_pred;
+        BitField<42, 1, u64> neg_bop_pred;
+        BitField<45, 2, BooleanOp> bop;
+    } const csetp{insn};
+
+    const BooleanOp bop{csetp.bop};
+    const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
+    const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
+    const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
+    const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
+    ir.SetPred(csetp.dest_pred_a, result_a);
+    ir.SetPred(csetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 2, FpRounding> fp_rounding;
+        BitField<45, 1, u64> neg_b;
+        BitField<46, 1, u64> abs_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_a;
+        BitField<49, 1, u64> abs_b;
+    } const dadd{insn};
+    if (dadd.cc != 0) {
+        throw NotImplementedException("DADD CC");
+    }
+
+    const IR::F64 src_a{v.D(dadd.src_a_reg)};
+    const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
+    const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
+
+    const IR::FpControl control{
+        .no_contraction = true,
+        .rounding = CastFpRounding(dadd.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
+    };
+
+    v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DADD_reg(u64 insn) {
+    DADD(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DADD_cbuf(u64 insn) {
+    DADD(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DADD_imm(u64 insn) {
+    DADD(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+        BitField<43, 1, u64> negate_a;
+        BitField<44, 1, u64> abs_b;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<47, 1, u64> cc;
+        BitField<48, 4, FPCompareOp> compare_op;
+        BitField<52, 1, u64> bf;
+        BitField<53, 1, u64> negate_b;
+        BitField<54, 1, u64> abs_a;
+    } const dset{insn};
+
+    const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
+    const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
+
+    IR::U1 pred{v.ir.GetPred(dset.pred)};
+    if (dset.neg_pred != 0) {
+        pred = v.ir.LogicalNot(pred);
+    }
+    const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
+    const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
+
+    const IR::U32 one_mask{v.ir.Imm32(-1)};
+    const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+    const IR::U32 zero{v.ir.Imm32(0)};
+    const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
+    const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+    v.X(dset.dest_reg, result);
+    if (dset.cc != 0) {
+        const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+        v.SetZFlag(is_zero);
+        if (dset.bf != 0) {
+            v.ResetSFlag();
+        } else {
+            v.SetSFlag(v.ir.LogicalNot(is_zero));
+        }
+        v.ResetCFlag();
+        v.ResetOFlag();
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSET_reg(u64 insn) {
+    DSET(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSET_cbuf(u64 insn) {
+    DSET(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSET_imm(u64 insn) {
+    DSET(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<50, 2, FpRounding> fp_rounding;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_b;
+        BitField<49, 1, u64> neg_c;
+    } const dfma{insn};
+
+    if (dfma.cc != 0) {
+        throw NotImplementedException("DFMA CC");
+    }
+
+    const IR::F64 src_a{v.D(dfma.src_a_reg)};
+    const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
+    const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
+
+    const IR::FpControl control{
+        .no_contraction = true,
+        .rounding = CastFpRounding(dfma.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
+    };
+
+    v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DFMA_reg(u64 insn) {
+    DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_cr(u64 insn) {
+    DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_rc(u64 insn) {
+    DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DFMA_imm(u64 insn) {
+    DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+        BitField<45, 1, u64> negate_b;
+        BitField<46, 1, u64> abs_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> negate_a;
+        BitField<49, 1, u64> abs_b;
+    } const dmnmx{insn};
+
+    if (dmnmx.cc != 0) {
+        throw NotImplementedException("DMNMX CC");
+    }
+
+    const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
+    const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
+    const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
+
+    IR::F64 max{v.ir.FPMax(op_a, op_b)};
+    IR::F64 min{v.ir.FPMin(op_a, op_b)};
+
+    if (dmnmx.neg_pred != 0) {
+        std::swap(min, max);
+    }
+    v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMNMX_reg(u64 insn) {
+    DMNMX(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
+    DMNMX(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMNMX_imm(u64 insn) {
+    DMNMX(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 2, FpRounding> fp_rounding;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg;
+    } const dmul{insn};
+
+    if (dmul.cc != 0) {
+        throw NotImplementedException("DMUL CC");
+    }
+
+    const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
+    const IR::FpControl control{
+        .no_contraction = true,
+        .rounding = CastFpRounding(dmul.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
+    };
+
+    v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMUL_reg(u64 insn) {
+    DMUL(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMUL_cbuf(u64 insn) {
+    DMUL(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMUL_imm(u64 insn) {
+    DMUL(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+    union {
+        u64 insn;
+        BitField<0, 3, IR::Pred> dest_pred_b;
+        BitField<3, 3, IR::Pred> dest_pred_a;
+        BitField<6, 1, u64> negate_b;
+        BitField<7, 1, u64> abs_a;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 3, IR::Pred> bop_pred;
+        BitField<42, 1, u64> neg_bop_pred;
+        BitField<43, 1, u64> negate_a;
+        BitField<44, 1, u64> abs_b;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<48, 4, FPCompareOp> compare_op;
+    } const dsetp{insn};
+
+    const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
+    const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
+
+    const BooleanOp bop{dsetp.bop};
+    const FPCompareOp compare_op{dsetp.compare_op};
+    const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
+    const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
+    const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+    const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+    v.ir.SetPred(dsetp.dest_pred_a, result_a);
+    v.ir.SetPred(dsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSETP_reg(u64 insn) {
+    DSETP(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSETP_cbuf(u64 insn) {
+    DSETP(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSETP_imm(u64 insn) {
+    DSETP(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ExitFragment(TranslatorVisitor& v) {
+    const ProgramHeader sph{v.env.SPH()};
+    IR::Reg src_reg{IR::Reg::R0};
+    for (u32 render_target = 0; render_target < 8; ++render_target) {
+        const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
+        for (u32 component = 0; component < 4; ++component) {
+            if (!mask[component]) {
+                continue;
+            }
+            v.ir.SetFragColor(render_target, component, v.F(src_reg));
+            ++src_reg;
+        }
+    }
+    if (sph.ps.omap.sample_mask != 0) {
+        v.ir.SetSampleMask(v.X(src_reg));
+    }
+    if (sph.ps.omap.depth != 0) {
+        v.ir.SetFragDepth(v.F(src_reg + 1));
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::EXIT() {
+    switch (env.ShaderStage()) {
+    case Stage::Fragment:
+        ExitFragment(*this);
+        break;
+    default:
+        break;
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<40, 1, u64> tilde;
+        BitField<41, 1, u64> shift;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> is_signed;
+    } const flo{insn};
+
+    if (flo.cc != 0) {
+        throw NotImplementedException("CC");
+    }
+    if (flo.tilde != 0) {
+        src = v.ir.BitwiseNot(src);
+    }
+    IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
+    if (flo.shift != 0) {
+        const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
+        result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
+    }
+    v.X(flo.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FLO_reg(u64 insn) {
+    FLO(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::FLO_cbuf(u64 insn) {
+    FLO(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::FLO_imm(u64 insn) {
+    FLO(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
+          const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const fadd{insn};
+
+    if (cc) {
+        throw NotImplementedException("FADD CC");
+    }
+    const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
+    const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
+    IR::FpControl control{
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+    IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
+    if (sat) {
+        value = v.ir.FPSaturate(value);
+    }
+    v.F(fadd.dest_reg, value);
+}
+
+void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+    union {
+        u64 raw;
+        BitField<39, 2, FpRounding> fp_rounding;
+        BitField<44, 1, u64> ftz;
+        BitField<45, 1, u64> neg_b;
+        BitField<46, 1, u64> abs_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_a;
+        BitField<49, 1, u64> abs_b;
+        BitField<50, 1, u64> sat;
+    } const fadd{insn};
+
+    FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
+         fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FADD_reg(u64 insn) {
+    FADD(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FADD_cbuf(u64 insn) {
+    FADD(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FADD_imm(u64 insn) {
+    FADD(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FADD32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<55, 1, u64> ftz;
+        BitField<56, 1, u64> neg_a;
+        BitField<54, 1, u64> abs_a;
+        BitField<52, 1, u64> cc;
+        BitField<53, 1, u64> neg_b;
+        BitField<57, 1, u64> abs_b;
+    } const fadd32i{insn};
+
+    FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
+         fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<47, 1, u64> ftz;
+        BitField<48, 4, FPCompareOp> compare_op;
+    } const fcmp{insn};
+
+    const IR::F32 zero{v.ir.Imm32(0.0f)};
+    const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
+    const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
+    const IR::U32 src_reg{v.X(fcmp.src_reg)};
+    const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+    v.X(fcmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FCMP_reg(u64 insn) {
+    FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_rc(u64 insn) {
+    FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FCMP_cr(u64 insn) {
+    FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_imm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 19, u64> value;
+        BitField<56, 1, u64> is_negative;
+    } const fcmp{insn};
+    const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
+    const u32 value{static_cast<u32>(fcmp.value) << 12};
+
+    FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+        BitField<43, 1, u64> negate_a;
+        BitField<44, 1, u64> abs_b;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<47, 1, u64> cc;
+        BitField<48, 4, FPCompareOp> compare_op;
+        BitField<52, 1, u64> bf;
+        BitField<53, 1, u64> negate_b;
+        BitField<54, 1, u64> abs_a;
+        BitField<55, 1, u64> ftz;
+    } const fset{insn};
+
+    const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
+    const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
+    const IR::FpControl control{
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+
+    IR::U1 pred{v.ir.GetPred(fset.pred)};
+    if (fset.neg_pred != 0) {
+        pred = v.ir.LogicalNot(pred);
+    }
+    const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
+    const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
+
+    const IR::U32 one_mask{v.ir.Imm32(-1)};
+    const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+    const IR::U32 zero{v.ir.Imm32(0)};
+    const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
+    const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+    v.X(fset.dest_reg, result);
+    if (fset.cc != 0) {
+        const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+        v.SetZFlag(is_zero);
+        if (fset.bf != 0) {
+            v.ResetSFlag();
+        } else {
+            v.SetSFlag(v.ir.LogicalNot(is_zero));
+        }
+        v.ResetCFlag();
+        v.ResetOFlag();
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSET_reg(u64 insn) {
+    FSET(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSET_cbuf(u64 insn) {
+    FSET(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSET_imm(u64 insn) {
+    FSET(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+    F16 = 1,
+    F32 = 2,
+    F64 = 3,
+};
+
+enum class RoundingOp : u64 {
+    None = 0,
+    Pass = 3,
+    Round = 8,
+    Floor = 9,
+    Ceil = 10,
+    Trunc = 11,
+};
+
+[[nodiscard]] u32 WidthSize(FloatFormat width) {
+    switch (width) {
+    case FloatFormat::F16:
+        return 16;
+    case FloatFormat::F32:
+        return 32;
+    case FloatFormat::F64:
+        return 64;
+    default:
+        throw NotImplementedException("Invalid width {}", width);
+    }
+}
+
+void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<44, 1, u64> ftz;
+        BitField<45, 1, u64> neg;
+        BitField<47, 1, u64> cc;
+        BitField<50, 1, u64> sat;
+        BitField<39, 4, u64> rounding_op;
+        BitField<39, 2, FpRounding> rounding;
+        BitField<10, 2, FloatFormat> src_size;
+        BitField<8, 2, FloatFormat> dst_size;
+
+        [[nodiscard]] RoundingOp RoundingOperation() const {
+            constexpr u64 rounding_mask = 0x0B;
+            return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
+        }
+    } const f2f{insn};
+
+    if (f2f.cc != 0) {
+        throw NotImplementedException("F2F CC");
+    }
+
+    IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
+
+    const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
+    IR::FpControl fp_control{
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+    if (f2f.src_size != f2f.dst_size) {
+        fp_control.rounding = CastFpRounding(f2f.rounding);
+        input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
+    } else {
+        switch (f2f.RoundingOperation()) {
+        case RoundingOp::None:
+        case RoundingOp::Pass:
+            // Make sure NANs are handled properly
+            switch (f2f.src_size) {
+            case FloatFormat::F16:
+                input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
+                break;
+            case FloatFormat::F32:
+                input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
+                break;
+            case FloatFormat::F64:
+                input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
+                break;
+            }
+            break;
+        case RoundingOp::Round:
+            input = v.ir.FPRoundEven(input, fp_control);
+            break;
+        case RoundingOp::Floor:
+            input = v.ir.FPFloor(input, fp_control);
+            break;
+        case RoundingOp::Ceil:
+            input = v.ir.FPCeil(input, fp_control);
+            break;
+        case RoundingOp::Trunc:
+            input = v.ir.FPTrunc(input, fp_control);
+            break;
+        default:
+            throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
+        }
+    }
+    if (f2f.sat != 0 && !any_fp64) {
+        input = v.ir.FPSaturate(input);
+    }
+
+    switch (f2f.dst_size) {
+    case FloatFormat::F16: {
+        const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+        v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
+        break;
+    }
+    case FloatFormat::F32:
+        v.F(f2f.dest_reg, input);
+        break;
+    case FloatFormat::F64:
+        v.D(f2f.dest_reg, input);
+        break;
+    default:
+        throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2F_reg(u64 insn) {
+    union {
+        u64 insn;
+        BitField<49, 1, u64> abs;
+        BitField<10, 2, FloatFormat> src_size;
+        BitField<41, 1, u64> selector;
+    } const f2f{insn};
+
+    IR::F16F32F64 src_a;
+    switch (f2f.src_size) {
+    case FloatFormat::F16: {
+        auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
+        src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+        break;
+    }
+    case FloatFormat::F32:
+        src_a = GetFloatReg20(insn);
+        break;
+    case FloatFormat::F64:
+        src_a = GetDoubleReg20(insn);
+        break;
+    default:
+        throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+    }
+    F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_cbuf(u64 insn) {
+    union {
+        u64 insn;
+        BitField<49, 1, u64> abs;
+        BitField<10, 2, FloatFormat> src_size;
+        BitField<41, 1, u64> selector;
+    } const f2f{insn};
+
+    IR::F16F32F64 src_a;
+    switch (f2f.src_size) {
+    case FloatFormat::F16: {
+        auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
+        src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+        break;
+    }
+    case FloatFormat::F32:
+        src_a = GetFloatCbuf(insn);
+        break;
+    case FloatFormat::F64:
+        src_a = GetDoubleCbuf(insn);
+        break;
+    default:
+        throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+    }
+    F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
+    union {
+        u64 insn;
+        BitField<49, 1, u64> abs;
+        BitField<10, 2, FloatFormat> src_size;
+        BitField<41, 1, u64> selector;
+        BitField<20, 19, u64> imm;
+        BitField<56, 1, u64> imm_neg;
+    } const f2f{insn};
+
+    IR::F16F32F64 src_a;
+    switch (f2f.src_size) {
+    case FloatFormat::F16: {
+        const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
+        const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
+        src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
+        if (f2f.imm_neg != 0) {
+            throw NotImplementedException("Neg bit on F16");
+        }
+        break;
+    }
+    case FloatFormat::F32:
+        src_a = GetFloatImm20(insn);
+        break;
+    case FloatFormat::F64:
+        src_a = GetDoubleImm20(insn);
+        break;
+    default:
+        throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+    }
+    F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <limits>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class DestFormat : u64 {
+    Invalid,
+    I16,
+    I32,
+    I64,
+};
+enum class SrcFormat : u64 {
+    Invalid,
+    F16,
+    F32,
+    F64,
+};
+enum class Rounding : u64 {
+    Round,
+    Floor,
+    Ceil,
+    Trunc,
+};
+
+union F2I {
+    u64 raw;
+    BitField<0, 8, IR::Reg> dest_reg;
+    BitField<8, 2, DestFormat> dest_format;
+    BitField<10, 2, SrcFormat> src_format;
+    BitField<12, 1, u64> is_signed;
+    BitField<39, 2, Rounding> rounding;
+    BitField<41, 1, u64> half;
+    BitField<44, 1, u64> ftz;
+    BitField<45, 1, u64> abs;
+    BitField<47, 1, u64> cc;
+    BitField<49, 1, u64> neg;
+};
+
+size_t BitSize(DestFormat dest_format) {
+    switch (dest_format) {
+    case DestFormat::I16:
+        return 16;
+    case DestFormat::I32:
+        return 32;
+    case DestFormat::I64:
+        return 64;
+    default:
+        throw NotImplementedException("Invalid destination format {}", dest_format);
+    }
+}
+
+std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
+    if (is_signed) {
+        switch (format) {
+        case DestFormat::I16:
+            return {static_cast<f64>(std::numeric_limits<s16>::max()),
+                    static_cast<f64>(std::numeric_limits<s16>::min())};
+        case DestFormat::I32:
+            return {static_cast<f64>(std::numeric_limits<s32>::max()),
+                    static_cast<f64>(std::numeric_limits<s32>::min())};
+        case DestFormat::I64:
+            return {static_cast<f64>(std::numeric_limits<s64>::max()),
+                    static_cast<f64>(std::numeric_limits<s64>::min())};
+        default:
+            break;
+        }
+    } else {
+        switch (format) {
+        case DestFormat::I16:
+            return {static_cast<f64>(std::numeric_limits<u16>::max()),
+                    static_cast<f64>(std::numeric_limits<u16>::min())};
+        case DestFormat::I32:
+            return {static_cast<f64>(std::numeric_limits<u32>::max()),
+                    static_cast<f64>(std::numeric_limits<u32>::min())};
+        case DestFormat::I64:
+            return {static_cast<f64>(std::numeric_limits<u64>::max()),
+                    static_cast<f64>(std::numeric_limits<u64>::min())};
+        default:
+            break;
+        }
+    }
+    throw NotImplementedException("Invalid destination format {}", format);
+}
+
+IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 14, s64> offset;
+        BitField<34, 5, u64> binding;
+    } const cbuf{insn};
+    if (cbuf.binding >= 18) {
+        throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+    }
+    if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
+        throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
+    }
+    if (cbuf.offset % 2 != 0) {
+        throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
+    }
+    const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
+    const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
+    const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
+    const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
+    return v.ir.PackDouble2x32(vector);
+}
+
+void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
+    // F2I is used to convert from a floating point value to an integer
+    const F2I f2i{insn};
+
+    const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
+                            f2i.dest_format != DestFormat::I64};
+    IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
+    if (denorm_cares) {
+        fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
+    }
+    const IR::FpControl fp_control{
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = fmz_mode,
+    };
+    const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
+    const IR::F16F32F64 rounded_value{[&] {
+        switch (f2i.rounding) {
+        case Rounding::Round:
+            return v.ir.FPRoundEven(op_a, fp_control);
+        case Rounding::Floor:
+            return v.ir.FPFloor(op_a, fp_control);
+        case Rounding::Ceil:
+            return v.ir.FPCeil(op_a, fp_control);
+        case Rounding::Trunc:
+            return v.ir.FPTrunc(op_a, fp_control);
+        default:
+            throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
+        }
+    }()};
+    const bool is_signed{f2i.is_signed != 0};
+    const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
+
+    IR::F16F32F64 intermediate;
+    switch (f2i.src_format) {
+    case SrcFormat::F16: {
+        const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
+        const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
+        intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+        break;
+    }
+    case SrcFormat::F32: {
+        const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
+        const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
+        intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+        break;
+    }
+    case SrcFormat::F64: {
+        const IR::F64 max_val{v.ir.Imm64(max_bound)};
+        const IR::F64 min_val{v.ir.Imm64(min_bound)};
+        intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
+    }
+
+    const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
+    IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
+
+    bool handled_special_case = false;
+    const bool special_nan_cases =
+        (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
+    if (special_nan_cases) {
+        if (f2i.dest_format == DestFormat::I32) {
+            handled_special_case = true;
+            result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
+        } else if (f2i.dest_format == DestFormat::I64) {
+            handled_special_case = true;
+            result = IR::U64{
+                v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
+        }
+    }
+    if (!handled_special_case && is_signed) {
+        if (bitsize != 64) {
+            result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
+        } else {
+            result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
+        }
+    }
+
+    if (bitsize == 64) {
+        v.L(f2i.dest_reg, result);
+    } else {
+        v.X(f2i.dest_reg, result);
+    }
+
+    if (f2i.cc != 0) {
+        throw NotImplementedException("F2I CC");
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2I_reg(u64 insn) {
+    union {
+        u64 raw;
+        F2I base;
+        BitField<20, 8, IR::Reg> src_reg;
+    } const f2i{insn};
+
+    const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+        switch (f2i.base.src_format) {
+        case SrcFormat::F16:
+            return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
+        case SrcFormat::F32:
+            return F(f2i.src_reg);
+        case SrcFormat::F64:
+            return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
+        default:
+            throw NotImplementedException("Invalid F2I source format {}",
+                                          f2i.base.src_format.Value());
+        }
+    }()};
+    TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_cbuf(u64 insn) {
+    const F2I f2i{insn};
+    const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+        switch (f2i.src_format) {
+        case SrcFormat::F16:
+            return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
+        case SrcFormat::F32:
+            return GetFloatCbuf(insn);
+        case SrcFormat::F64: {
+            return UnpackCbuf(*this, insn);
+        }
+        default:
+            throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
+        }
+    }()};
+    TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_imm(u64) {
+    throw NotImplementedException("{}", Opcode::F2I_imm);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
+          bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const ffma{insn};
+
+    if (cc) {
+        throw NotImplementedException("FFMA CC");
+    }
+    const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
+    const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+    const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
+    const IR::FpControl fp_control{
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = CastFmzMode(fmz_mode),
+    };
+    IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
+    if (fmz_mode == FmzMode::FMZ && !sat) {
+        // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+        // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+        const IR::F32 zero{v.ir.Imm32(0.0f)};
+        const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+        const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+        const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+        value = IR::F32{v.ir.Select(any_zero, op_c, value)};
+    }
+    if (sat) {
+        value = v.ir.FPSaturate(value);
+    }
+    v.F(ffma.dest_reg, value);
+}
+
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
+    union {
+        u64 raw;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_b;
+        BitField<49, 1, u64> neg_c;
+        BitField<50, 1, u64> sat;
+        BitField<51, 2, FpRounding> fp_rounding;
+        BitField<53, 2, FmzMode> fmz_mode;
+    } const ffma{insn};
+
+    FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
+         ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FFMA_reg(u64 insn) {
+    FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_rc(u64 insn) {
+    FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FFMA_cr(u64 insn) {
+    FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_imm(u64 insn) {
+    FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
+        BitField<52, 1, u64> cc;
+        BitField<53, 2, FmzMode> fmz_mode;
+        BitField<55, 1, u64> sat;
+        BitField<56, 1, u64> neg_a;
+        BitField<57, 1, u64> neg_c;
+    } const ffma32i{insn};
+
+    FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
+         ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+        BitField<44, 1, u64> ftz;
+        BitField<45, 1, u64> negate_b;
+        BitField<46, 1, u64> abs_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> negate_a;
+        BitField<49, 1, u64> abs_b;
+    } const fmnmx{insn};
+
+    if (fmnmx.cc) {
+        throw NotImplementedException("FMNMX CC");
+    }
+
+    const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
+    const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
+    const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
+
+    const IR::FpControl control{
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+    IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
+    IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
+
+    if (fmnmx.neg_pred != 0) {
+        std::swap(min, max);
+    }
+
+    v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMNMX_reg(u64 insn) {
+    FMNMX(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
+    FMNMX(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMNMX_imm(u64 insn) {
+    FMNMX(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Operation : u64 {
+    Cos = 0,
+    Sin = 1,
+    Ex2 = 2,    // Base 2 exponent
+    Lg2 = 3,    // Base 2 logarithm
+    Rcp = 4,    // Reciprocal
+    Rsq = 5,    // Reciprocal square root
+    Rcp64H = 6, // 64-bit reciprocal
+    Rsq64H = 7, // 64-bit reciprocal square root
+    Sqrt = 8,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MUFU(u64 insn) {
+    // MUFU is used to implement a bunch of special functions. See Operation.
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<20, 4, Operation> operation;
+        BitField<46, 1, u64> abs;
+        BitField<48, 1, u64> neg;
+        BitField<50, 1, u64> sat;
+    } const mufu{insn};
+
+    const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
+    IR::F32 value{[&]() -> IR::F32 {
+        switch (mufu.operation) {
+        case Operation::Cos:
+            return ir.FPCos(op_a);
+        case Operation::Sin:
+            return ir.FPSin(op_a);
+        case Operation::Ex2:
+            return ir.FPExp2(op_a);
+        case Operation::Lg2:
+            return ir.FPLog2(op_a);
+        case Operation::Rcp:
+            return ir.FPRecip(op_a);
+        case Operation::Rsq:
+            return ir.FPRecipSqrt(op_a);
+        case Operation::Rcp64H:
+            throw NotImplementedException("MUFU.RCP64H");
+        case Operation::Rsq64H:
+            throw NotImplementedException("MUFU.RSQ64H");
+        case Operation::Sqrt:
+            return ir.FPSqrt(op_a);
+        default:
+            throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
+        }
+    }()};
+
+    if (mufu.sat) {
+        value = ir.FPSaturate(value);
+    }
+
+    F(mufu.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Scale : u64 {
+    None,
+    D2,
+    D4,
+    D8,
+    M8,
+    M4,
+    M2,
+    INVALIDSCALE37,
+};
+
+float ScaleFactor(Scale scale) {
+    switch (scale) {
+    case Scale::None:
+        return 1.0f;
+    case Scale::D2:
+        return 1.0f / 2.0f;
+    case Scale::D4:
+        return 1.0f / 4.0f;
+    case Scale::D8:
+        return 1.0f / 8.0f;
+    case Scale::M8:
+        return 8.0f;
+    case Scale::M4:
+        return 4.0f;
+    case Scale::M2:
+        return 2.0f;
+    case Scale::INVALIDSCALE37:
+        break;
+    }
+    throw NotImplementedException("Invalid FMUL scale {}", scale);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
+          FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const fmul{insn};
+
+    if (cc) {
+        throw NotImplementedException("FMUL CC");
+    }
+    IR::F32 op_a{v.F(fmul.src_a)};
+    if (scale != Scale::None) {
+        if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
+            throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
+        }
+        op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
+    }
+    const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+    const IR::FpControl fp_control{
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = CastFmzMode(fmz_mode),
+    };
+    IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
+    if (fmz_mode == FmzMode::FMZ && !sat) {
+        // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+        // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+        const IR::F32 zero{v.ir.Imm32(0.0f)};
+        const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+        const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+        const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+        value = IR::F32{v.ir.Select(any_zero, zero, value)};
+    }
+    if (sat) {
+        value = v.ir.FPSaturate(value);
+    }
+    v.F(fmul.dest_reg, value);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+    union {
+        u64 raw;
+        BitField<39, 2, FpRounding> fp_rounding;
+        BitField<41, 3, Scale> scale;
+        BitField<44, 2, FmzMode> fmz;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_b;
+        BitField<50, 1, u64> sat;
+    } const fmul{insn};
+
+    FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
+         fmul.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMUL_reg(u64 insn) {
+    return FMUL(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMUL_cbuf(u64 insn) {
+    return FMUL(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMUL_imm(u64 insn) {
+    return FMUL(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FMUL32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> cc;
+        BitField<53, 2, FmzMode> fmz;
+        BitField<55, 1, u64> sat;
+    } const fmul32i{insn};
+
+    FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
+         fmul32i.sat != 0, fmul32i.cc != 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    SINCOS,
+    EX2,
+};
+
+void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<39, 1, Mode> mode;
+        BitField<45, 1, u64> neg;
+        BitField<49, 1, u64> abs;
+    } const rro{insn};
+
+    v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::RRO_reg(u64 insn) {
+    RRO(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::RRO_cbuf(u64 insn) {
+    RRO(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::RRO_imm(u64) {
+    throw NotImplementedException("RRO (imm)");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+    union {
+        u64 insn;
+        BitField<0, 3, IR::Pred> dest_pred_b;
+        BitField<3, 3, IR::Pred> dest_pred_a;
+        BitField<6, 1, u64> negate_b;
+        BitField<7, 1, u64> abs_a;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 3, IR::Pred> bop_pred;
+        BitField<42, 1, u64> neg_bop_pred;
+        BitField<43, 1, u64> negate_a;
+        BitField<44, 1, u64> abs_b;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<47, 1, u64> ftz;
+        BitField<48, 4, FPCompareOp> compare_op;
+    } const fsetp{insn};
+
+    const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
+    const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
+    const IR::FpControl control{
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+
+    const BooleanOp bop{fsetp.bop};
+    const FPCompareOp compare_op{fsetp.compare_op};
+    const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
+    const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
+    const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+    const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+    v.ir.SetPred(fsetp.dest_pred_a, result_a);
+    v.ir.SetPred(fsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSETP_reg(u64 insn) {
+    FSETP(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSETP_cbuf(u64 insn) {
+    FSETP(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSETP_imm(u64 insn) {
+    FSETP(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::FSWZADD(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<28, 8, u64> swizzle;
+        BitField<38, 1, u64> ndv;
+        BitField<39, 2, FpRounding> round;
+        BitField<44, 1, u64> ftz;
+        BitField<47, 1, u64> cc;
+    } const fswzadd{insn};
+
+    if (fswzadd.ndv != 0) {
+        throw NotImplementedException("FSWZADD NDV");
+    }
+
+    const IR::F32 src_a{GetFloatReg8(insn)};
+    const IR::F32 src_b{GetFloatReg20(insn)};
+    const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
+
+    const IR::FpControl fp_control{
+        .no_contraction = false,
+        .rounding = CastFpRounding(fswzadd.round),
+        .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+
+    const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
+    F(fswzadd.dest_reg, result);
+
+    if (fswzadd.cc != 0) {
+        throw NotImplementedException("FSWZADD CC");
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
+           Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const hadd2{insn};
+
+    auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
+    auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+    const bool promotion{lhs_a.Type() != lhs_b.Type()};
+    if (promotion) {
+        if (lhs_a.Type() == IR::Type::F16) {
+            lhs_a = v.ir.FPConvert(32, lhs_a);
+            rhs_a = v.ir.FPConvert(32, rhs_a);
+        }
+        if (lhs_b.Type() == IR::Type::F16) {
+            lhs_b = v.ir.FPConvert(32, lhs_b);
+            rhs_b = v.ir.FPConvert(32, rhs_b);
+        }
+    }
+    lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+    rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+    lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+    rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+    const IR::FpControl fp_control{
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+    IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
+    IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
+    if (sat) {
+        lhs = v.ir.FPSaturate(lhs);
+        rhs = v.ir.FPSaturate(rhs);
+    }
+    if (promotion) {
+        lhs = v.ir.FPConvert(16, lhs);
+        rhs = v.ir.FPConvert(16, rhs);
+    }
+    v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
+}
+
+void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
+           const IR::U32& src_b) {
+    union {
+        u64 raw;
+        BitField<49, 2, Merge> merge;
+        BitField<39, 1, u64> ftz;
+        BitField<43, 1, u64> neg_a;
+        BitField<44, 1, u64> abs_a;
+        BitField<47, 2, Swizzle> swizzle_a;
+    } const hadd2{insn};
+
+    HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
+          hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HADD2_reg(u64 insn) {
+    union {
+        u64 raw;
+        BitField<32, 1, u64> sat;
+        BitField<31, 1, u64> neg_b;
+        BitField<30, 1, u64> abs_b;
+        BitField<28, 2, Swizzle> swizzle_b;
+    } const hadd2{insn};
+
+    HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
+          GetReg20(insn));
+}
+
+void TranslatorVisitor::HADD2_cbuf(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> sat;
+        BitField<56, 1, u64> neg_b;
+        BitField<54, 1, u64> abs_b;
+    } const hadd2{insn};
+
+    HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
+          GetCbuf(insn));
+}
+
+void TranslatorVisitor::HADD2_imm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> sat;
+        BitField<56, 1, u64> neg_high;
+        BitField<30, 9, u64> high;
+        BitField<29, 1, u64> neg_low;
+        BitField<20, 9, u64> low;
+    } const hadd2{insn};
+
+    const u32 imm{
+        static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
+    HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HADD2_32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<55, 1, u64> ftz;
+        BitField<52, 1, u64> sat;
+        BitField<56, 1, u64> neg_a;
+        BitField<53, 2, Swizzle> swizzle_a;
+        BitField<20, 32, u64> imm32;
+    } const hadd2{insn};
+
+    const u32 imm{static_cast<u32>(hadd2.imm32)};
+    HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
+          hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
+           Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
+           bool sat, HalfPrecision precision) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const hfma2{insn};
+
+    auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
+    auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+    auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
+    const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
+    if (promotion) {
+        if (lhs_a.Type() == IR::Type::F16) {
+            lhs_a = v.ir.FPConvert(32, lhs_a);
+            rhs_a = v.ir.FPConvert(32, rhs_a);
+        }
+        if (lhs_b.Type() == IR::Type::F16) {
+            lhs_b = v.ir.FPConvert(32, lhs_b);
+            rhs_b = v.ir.FPConvert(32, rhs_b);
+        }
+        if (lhs_c.Type() == IR::Type::F16) {
+            lhs_c = v.ir.FPConvert(32, lhs_c);
+            rhs_c = v.ir.FPConvert(32, rhs_c);
+        }
+    }
+
+    lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
+    rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
+
+    lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
+    rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
+
+    const IR::FpControl fp_control{
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = HalfPrecision2FmzMode(precision),
+    };
+    IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
+    IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
+    if (precision == HalfPrecision::FMZ && !sat) {
+        // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+        // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+        const IR::F32 zero{v.ir.Imm32(0.0f)};
+        const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+        const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+        const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+        lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
+
+        const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+        const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+        const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+        rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
+    }
+    if (sat) {
+        lhs = v.ir.FPSaturate(lhs);
+        rhs = v.ir.FPSaturate(rhs);
+    }
+    if (promotion) {
+        lhs = v.ir.FPConvert(16, lhs);
+        rhs = v.ir.FPConvert(16, rhs);
+    }
+    v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
+}
+
+void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
+           Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
+           HalfPrecision precision) {
+    union {
+        u64 raw;
+        BitField<47, 2, Swizzle> swizzle_a;
+        BitField<49, 2, Merge> merge;
+    } const hfma2{insn};
+
+    HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
+          sat, precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HFMA2_reg(u64 insn) {
+    union {
+        u64 raw;
+        BitField<28, 2, Swizzle> swizzle_b;
+        BitField<32, 1, u64> saturate;
+        BitField<31, 1, u64> neg_b;
+        BitField<30, 1, u64> neg_c;
+        BitField<35, 2, Swizzle> swizzle_c;
+        BitField<37, 2, HalfPrecision> precision;
+    } const hfma2{insn};
+
+    HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
+          GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_rc(u64 insn) {
+    union {
+        u64 raw;
+        BitField<51, 1, u64> neg_c;
+        BitField<52, 1, u64> saturate;
+        BitField<53, 2, Swizzle> swizzle_b;
+        BitField<56, 1, u64> neg_b;
+        BitField<57, 2, HalfPrecision> precision;
+    } const hfma2{insn};
+
+    HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
+          GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_cr(u64 insn) {
+    union {
+        u64 raw;
+        BitField<51, 1, u64> neg_c;
+        BitField<52, 1, u64> saturate;
+        BitField<53, 2, Swizzle> swizzle_c;
+        BitField<56, 1, u64> neg_b;
+        BitField<57, 2, HalfPrecision> precision;
+    } const hfma2{insn};
+
+    HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
+          GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_imm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<51, 1, u64> neg_c;
+        BitField<52, 1, u64> saturate;
+        BitField<53, 2, Swizzle> swizzle_c;
+
+        BitField<56, 1, u64> neg_high;
+        BitField<30, 9, u64> high;
+        BitField<29, 1, u64> neg_low;
+        BitField<20, 9, u64> low;
+        BitField<57, 2, HalfPrecision> precision;
+    } const hfma2{insn};
+
+    const u32 imm{
+        static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
+
+    HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
+          GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> src_c;
+        BitField<20, 32, u64> imm32;
+        BitField<52, 1, u64> neg_c;
+        BitField<53, 2, Swizzle> swizzle_a;
+        BitField<55, 2, HalfPrecision> precision;
+    } const hfma2{insn};
+
+    const u32 imm{static_cast<u32>(hfma2.imm32)};
+    HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
+          Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
+    switch (precision) {
+    case HalfPrecision::None:
+        return IR::FmzMode::None;
+    case HalfPrecision::FTZ:
+        return IR::FmzMode::FTZ;
+    case HalfPrecision::FMZ:
+        return IR::FmzMode::FMZ;
+    default:
+        return IR::FmzMode::DontCare;
+    }
+}
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
+    switch (swizzle) {
+    case Swizzle::H1_H0: {
+        const IR::Value vector{ir.UnpackFloat2x16(value)};
+        return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
+    }
+    case Swizzle::H0_H0: {
+        const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
+        return {scalar, scalar};
+    }
+    case Swizzle::H1_H1: {
+        const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
+        return {scalar, scalar};
+    }
+    case Swizzle::F32: {
+        const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
+        return {scalar, scalar};
+    }
+    }
+    throw InvalidArgument("Invalid swizzle {}", swizzle);
+}
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+                    Merge merge) {
+    switch (merge) {
+    case Merge::H1_H0:
+        return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
+    case Merge::F32:
+        return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
+    case Merge::MRG_H0:
+    case Merge::MRG_H1: {
+        const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
+        const bool is_h0{merge == Merge::MRG_H0};
+        const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
+        return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
+    }
+    }
+    throw InvalidArgument("Invalid merge {}", merge);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class Merge : u64 {
+    H1_H0,
+    F32,
+    MRG_H0,
+    MRG_H1,
+};
+
+enum class Swizzle : u64 {
+    H1_H0,
+    F32,
+    H0_H0,
+    H1_H1,
+};
+
+enum class HalfPrecision : u64 {
+    None = 0,
+    FTZ = 1,
+    FMZ = 2,
+};
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+                    Merge merge);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
+           Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
+           HalfPrecision precision) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const hmul2{insn};
+
+    auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
+    auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+    const bool promotion{lhs_a.Type() != lhs_b.Type()};
+    if (promotion) {
+        if (lhs_a.Type() == IR::Type::F16) {
+            lhs_a = v.ir.FPConvert(32, lhs_a);
+            rhs_a = v.ir.FPConvert(32, rhs_a);
+        }
+        if (lhs_b.Type() == IR::Type::F16) {
+            lhs_b = v.ir.FPConvert(32, lhs_b);
+            rhs_b = v.ir.FPConvert(32, rhs_b);
+        }
+    }
+    lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+    rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+    lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+    rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+    const IR::FpControl fp_control{
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = HalfPrecision2FmzMode(precision),
+    };
+    IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
+    IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
+    if (precision == HalfPrecision::FMZ && !sat) {
+        // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+        // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+        const IR::F32 zero{v.ir.Imm32(0.0f)};
+        const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+        const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+        const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+        lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
+
+        const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+        const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+        const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+        rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
+    }
+    if (sat) {
+        lhs = v.ir.FPSaturate(lhs);
+        rhs = v.ir.FPSaturate(rhs);
+    }
+    if (promotion) {
+        lhs = v.ir.FPConvert(16, lhs);
+        rhs = v.ir.FPConvert(16, rhs);
+    }
+    v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
+}
+
+void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
+           Swizzle swizzle_b, const IR::U32& src_b) {
+    union {
+        u64 raw;
+        BitField<49, 2, Merge> merge;
+        BitField<47, 2, Swizzle> swizzle_a;
+        BitField<39, 2, HalfPrecision> precision;
+    } const hmul2{insn};
+
+    HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
+          hmul2.precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HMUL2_reg(u64 insn) {
+    union {
+        u64 raw;
+        BitField<32, 1, u64> sat;
+        BitField<31, 1, u64> neg_b;
+        BitField<30, 1, u64> abs_b;
+        BitField<44, 1, u64> abs_a;
+        BitField<28, 2, Swizzle> swizzle_b;
+    } const hmul2{insn};
+
+    HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
+          hmul2.swizzle_b, GetReg20(insn));
+}
+
+void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> sat;
+        BitField<54, 1, u64> abs_b;
+        BitField<43, 1, u64> neg_a;
+        BitField<44, 1, u64> abs_a;
+    } const hmul2{insn};
+
+    HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
+          Swizzle::F32, GetCbuf(insn));
+}
+
+void TranslatorVisitor::HMUL2_imm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> sat;
+        BitField<56, 1, u64> neg_high;
+        BitField<30, 9, u64> high;
+        BitField<29, 1, u64> neg_low;
+        BitField<20, 9, u64> low;
+        BitField<43, 1, u64> neg_a;
+        BitField<44, 1, u64> abs_a;
+    } const hmul2{insn};
+
+    const u32 imm{
+        static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
+    HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
+          Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HMUL2_32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<55, 2, HalfPrecision> precision;
+        BitField<52, 1, u64> sat;
+        BitField<53, 2, Swizzle> swizzle_a;
+        BitField<20, 32, u64> imm32;
+    } const hmul2{insn};
+
+    const u32 imm{static_cast<u32>(hmul2.imm32)};
+    HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
+          Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
+           bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+        BitField<43, 1, u64> neg_a;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<44, 1, u64> abs_a;
+        BitField<47, 2, Swizzle> swizzle_a;
+    } const hset2{insn};
+
+    auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
+    auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+    if (lhs_a.Type() != lhs_b.Type()) {
+        if (lhs_a.Type() == IR::Type::F16) {
+            lhs_a = v.ir.FPConvert(32, lhs_a);
+            rhs_a = v.ir.FPConvert(32, rhs_a);
+        }
+        if (lhs_b.Type() == IR::Type::F16) {
+            lhs_b = v.ir.FPConvert(32, lhs_b);
+            rhs_b = v.ir.FPConvert(32, rhs_b);
+        }
+    }
+
+    lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+    rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+
+    lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+    rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+    const IR::FpControl control{
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+
+    IR::U1 pred{v.ir.GetPred(hset2.pred)};
+    if (hset2.neg_pred != 0) {
+        pred = v.ir.LogicalNot(pred);
+    }
+    const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+    const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+    const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
+    const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
+
+    const u32 true_value = bf ? 0x3c00 : 0xffff;
+    const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
+    const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
+    const IR::U32 fail_result{v.ir.Imm32(0)};
+    const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
+    const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
+
+    v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSET2_reg(u64 insn) {
+    union {
+        u64 insn;
+        BitField<30, 1, u64> abs_b;
+        BitField<49, 1, u64> bf;
+        BitField<31, 1, u64> neg_b;
+        BitField<50, 1, u64> ftz;
+        BitField<35, 4, FPCompareOp> compare_op;
+        BitField<28, 2, Swizzle> swizzle_b;
+    } const hset2{insn};
+
+    HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
+          hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
+}
+
+void TranslatorVisitor::HSET2_cbuf(u64 insn) {
+    union {
+        u64 insn;
+        BitField<53, 1, u64> bf;
+        BitField<56, 1, u64> neg_b;
+        BitField<54, 1, u64> ftz;
+        BitField<49, 4, FPCompareOp> compare_op;
+    } const hset2{insn};
+
+    HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
+          hset2.compare_op, Swizzle::F32);
+}
+
+void TranslatorVisitor::HSET2_imm(u64 insn) {
+    union {
+        u64 insn;
+        BitField<53, 1, u64> bf;
+        BitField<54, 1, u64> ftz;
+        BitField<49, 4, FPCompareOp> compare_op;
+        BitField<56, 1, u64> neg_high;
+        BitField<30, 9, u64> high;
+        BitField<29, 1, u64> neg_low;
+        BitField<20, 9, u64> low;
+    } const hset2{insn};
+
+    const u32 imm{
+        static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
+
+    HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
+          Swizzle::H1_H0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
+            Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
+    union {
+        u64 insn;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<3, 3, IR::Pred> dest_pred_a;
+        BitField<0, 3, IR::Pred> dest_pred_b;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+        BitField<43, 1, u64> neg_a;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<44, 1, u64> abs_a;
+        BitField<6, 1, u64> ftz;
+        BitField<47, 2, Swizzle> swizzle_a;
+    } const hsetp2{insn};
+
+    auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
+    auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+    if (lhs_a.Type() != lhs_b.Type()) {
+        if (lhs_a.Type() == IR::Type::F16) {
+            lhs_a = v.ir.FPConvert(32, lhs_a);
+            rhs_a = v.ir.FPConvert(32, rhs_a);
+        }
+        if (lhs_b.Type() == IR::Type::F16) {
+            lhs_b = v.ir.FPConvert(32, lhs_b);
+            rhs_b = v.ir.FPConvert(32, rhs_b);
+        }
+    }
+
+    lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+    rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+
+    lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+    rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+    const IR::FpControl control{
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+    };
+
+    IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
+    if (hsetp2.neg_pred != 0) {
+        pred = v.ir.LogicalNot(pred);
+    }
+    const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+    const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+    const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
+    const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
+
+    if (h_and) {
+        auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
+        v.ir.SetPred(hsetp2.dest_pred_a, result);
+        v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
+    } else {
+        v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
+        v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSETP2_reg(u64 insn) {
+    union {
+        u64 insn;
+        BitField<30, 1, u64> abs_b;
+        BitField<49, 1, u64> h_and;
+        BitField<31, 1, u64> neg_b;
+        BitField<35, 4, FPCompareOp> compare_op;
+        BitField<28, 2, Swizzle> swizzle_b;
+    } const hsetp2{insn};
+    HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
+           hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
+    union {
+        u64 insn;
+        BitField<53, 1, u64> h_and;
+        BitField<54, 1, u64> abs_b;
+        BitField<56, 1, u64> neg_b;
+        BitField<49, 4, FPCompareOp> compare_op;
+    } const hsetp2{insn};
+
+    HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
+           hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_imm(u64 insn) {
+    union {
+        u64 insn;
+        BitField<53, 1, u64> h_and;
+        BitField<54, 1, u64> ftz;
+        BitField<49, 4, FPCompareOp> compare_op;
+        BitField<56, 1, u64> neg_high;
+        BitField<30, 9, u64> high;
+        BitField<29, 1, u64> neg_low;
+        BitField<20, 9, u64> low;
+    } const hsetp2{insn};
+
+    const u32 imm{static_cast<u32>(hsetp2.low << 6) |
+                  static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
+                  static_cast<u32>(hsetp2.high << 22) |
+                  static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
+
+    HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
+           hsetp2.h_and != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
+                                    u32 offset) {
+    if (unaligned) {
+        return ir.Imm32(0);
+    }
+    return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
+}
+} // Anonymous namespace
+
+IR::U32 TranslatorVisitor::X(IR::Reg reg) {
+    return ir.GetReg(reg);
+}
+
+IR::U64 TranslatorVisitor::L(IR::Reg reg) {
+    if (!IR::IsAligned(reg, 2)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+    return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+IR::F32 TranslatorVisitor::F(IR::Reg reg) {
+    return ir.BitCast<IR::F32>(X(reg));
+}
+
+IR::F64 TranslatorVisitor::D(IR::Reg reg) {
+    if (!IR::IsAligned(reg, 2)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+    return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
+    ir.SetReg(dest_reg, value);
+}
+
+void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
+    if (!IR::IsAligned(dest_reg, 2)) {
+        throw NotImplementedException("Unaligned destination register {}", dest_reg);
+    }
+    const IR::Value result{ir.UnpackUint2x32(value)};
+    for (int i = 0; i < 2; i++) {
+        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+    }
+}
+
+void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
+    X(dest_reg, ir.BitCast<IR::U32>(value));
+}
+
+void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
+    if (!IR::IsAligned(dest_reg, 2)) {
+        throw NotImplementedException("Unaligned destination register {}", dest_reg);
+    }
+    const IR::Value result{ir.UnpackDouble2x32(value)};
+    for (int i = 0; i < 2; i++) {
+        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+    }
+}
+
+IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> index;
+    } const reg{insn};
+    return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 8, IR::Reg> index;
+    } const reg{insn};
+    return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
+    union {
+        u64 raw;
+        BitField<39, 8, IR::Reg> index;
+    } const reg{insn};
+    return X(reg.index);
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
+    return ir.BitCast<IR::F32>(GetReg8(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
+    return ir.BitCast<IR::F32>(GetReg20(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
+    return ir.BitCast<IR::F32>(GetReg39(insn));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 8, IR::Reg> index;
+    } const reg{insn};
+    return D(reg.index);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
+    union {
+        u64 raw;
+        BitField<39, 8, IR::Reg> index;
+    } const reg{insn};
+    return D(reg.index);
+}
+
+static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 14, u64> offset;
+        BitField<34, 5, u64> binding;
+    } const cbuf{insn};
+
+    if (cbuf.binding >= 18) {
+        throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+    }
+    if (cbuf.offset >= 0x10'000) {
+        throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
+    }
+    const IR::Value binding{static_cast<u32>(cbuf.binding)};
+    const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
+    return {IR::U32{binding}, IR::U32{byte_offset}};
+}
+
+IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
+    const auto [binding, byte_offset]{CbufAddr(insn)};
+    return ir.GetCbuf(binding, byte_offset);
+}
+
+IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
+    const auto [binding, byte_offset]{CbufAddr(insn)};
+    return ir.GetFloatCbuf(binding, byte_offset);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 1, u64> unaligned;
+    } const cbuf{insn};
+
+    const auto [binding, offset_value]{CbufAddr(insn)};
+    const bool unaligned{cbuf.unaligned != 0};
+    const u32 offset{offset_value.U32()};
+    const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
+
+    const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
+    const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
+    return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
+}
+
+IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 1, u64> unaligned;
+    } const cbuf{insn};
+
+    if (cbuf.unaligned != 0) {
+        throw NotImplementedException("Unaligned packed constant buffer read");
+    }
+    const auto [binding, lower_offset]{CbufAddr(insn)};
+    const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
+    const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
+    const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
+    return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
+}
+
+IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 19, u64> value;
+        BitField<56, 1, u64> is_negative;
+    } const imm{insn};
+
+    if (imm.is_negative != 0) {
+        const s64 raw{static_cast<s64>(imm.value)};
+        return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
+    } else {
+        return ir.Imm32(static_cast<u32>(imm.value));
+    }
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 19, u64> value;
+        BitField<56, 1, u64> is_negative;
+    } const imm{insn};
+    const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
+    const u32 value{static_cast<u32>(imm.value) << 12};
+    return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 19, u64> value;
+        BitField<56, 1, u64> is_negative;
+    } const imm{insn};
+    const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
+    const u64 value{imm.value << 44};
+    return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
+}
+
+IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
+    const s64 value{GetImm20(insn).U32()};
+    return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
+}
+
+IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 32, u64> value;
+    } const imm{insn};
+    return ir.Imm32(static_cast<u32>(imm.value));
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 32, u64> value;
+    } const imm{insn};
+    return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
+}
+
+void TranslatorVisitor::SetZFlag(const IR::U1& value) {
+    ir.SetZFlag(value);
+}
+
+void TranslatorVisitor::SetSFlag(const IR::U1& value) {
+    ir.SetSFlag(value);
+}
+
+void TranslatorVisitor::SetCFlag(const IR::U1& value) {
+    ir.SetCFlag(value);
+}
+
+void TranslatorVisitor::SetOFlag(const IR::U1& value) {
+    ir.SetOFlag(value);
+}
+
+void TranslatorVisitor::ResetZero() {
+    SetZFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetSFlag() {
+    SetSFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetCFlag() {
+    SetCFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetOFlag() {
+    SetOFlag(ir.Imm1(false));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+
+namespace Shader::Maxwell {
+
+enum class CompareOp : u64 {
+    False,
+    LessThan,
+    Equal,
+    LessThanEqual,
+    GreaterThan,
+    NotEqual,
+    GreaterThanEqual,
+    True,
+};
+
+enum class BooleanOp : u64 {
+    AND,
+    OR,
+    XOR,
+};
+
+enum class PredicateOp : u64 {
+    False,
+    True,
+    Zero,
+    NonZero,
+};
+
+enum class FPCompareOp : u64 {
+    F,
+    LT,
+    EQ,
+    LE,
+    GT,
+    NE,
+    GE,
+    NUM,
+    Nan,
+    LTU,
+    EQU,
+    LEU,
+    GTU,
+    NEU,
+    GEU,
+    T,
+};
+
+class TranslatorVisitor {
+public:
+    explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
+
+    Environment& env;
+    IR::IREmitter ir;
+
+    void AL2P(u64 insn);
+    void ALD(u64 insn);
+    void AST(u64 insn);
+    void ATOM_cas(u64 insn);
+    void ATOM(u64 insn);
+    void ATOMS_cas(u64 insn);
+    void ATOMS(u64 insn);
+    void B2R(u64 insn);
+    void BAR(u64 insn);
+    void BFE_reg(u64 insn);
+    void BFE_cbuf(u64 insn);
+    void BFE_imm(u64 insn);
+    void BFI_reg(u64 insn);
+    void BFI_rc(u64 insn);
+    void BFI_cr(u64 insn);
+    void BFI_imm(u64 insn);
+    void BPT(u64 insn);
+    void BRA(u64 insn);
+    void BRK(u64 insn);
+    void BRX(u64 insn);
+    void CAL();
+    void CCTL(u64 insn);
+    void CCTLL(u64 insn);
+    void CONT(u64 insn);
+    void CS2R(u64 insn);
+    void CSET(u64 insn);
+    void CSETP(u64 insn);
+    void DADD_reg(u64 insn);
+    void DADD_cbuf(u64 insn);
+    void DADD_imm(u64 insn);
+    void DEPBAR();
+    void DFMA_reg(u64 insn);
+    void DFMA_rc(u64 insn);
+    void DFMA_cr(u64 insn);
+    void DFMA_imm(u64 insn);
+    void DMNMX_reg(u64 insn);
+    void DMNMX_cbuf(u64 insn);
+    void DMNMX_imm(u64 insn);
+    void DMUL_reg(u64 insn);
+    void DMUL_cbuf(u64 insn);
+    void DMUL_imm(u64 insn);
+    void DSET_reg(u64 insn);
+    void DSET_cbuf(u64 insn);
+    void DSET_imm(u64 insn);
+    void DSETP_reg(u64 insn);
+    void DSETP_cbuf(u64 insn);
+    void DSETP_imm(u64 insn);
+    void EXIT();
+    void F2F_reg(u64 insn);
+    void F2F_cbuf(u64 insn);
+    void F2F_imm(u64 insn);
+    void F2I_reg(u64 insn);
+    void F2I_cbuf(u64 insn);
+    void F2I_imm(u64 insn);
+    void FADD_reg(u64 insn);
+    void FADD_cbuf(u64 insn);
+    void FADD_imm(u64 insn);
+    void FADD32I(u64 insn);
+    void FCHK_reg(u64 insn);
+    void FCHK_cbuf(u64 insn);
+    void FCHK_imm(u64 insn);
+    void FCMP_reg(u64 insn);
+    void FCMP_rc(u64 insn);
+    void FCMP_cr(u64 insn);
+    void FCMP_imm(u64 insn);
+    void FFMA_reg(u64 insn);
+    void FFMA_rc(u64 insn);
+    void FFMA_cr(u64 insn);
+    void FFMA_imm(u64 insn);
+    void FFMA32I(u64 insn);
+    void FLO_reg(u64 insn);
+    void FLO_cbuf(u64 insn);
+    void FLO_imm(u64 insn);
+    void FMNMX_reg(u64 insn);
+    void FMNMX_cbuf(u64 insn);
+    void FMNMX_imm(u64 insn);
+    void FMUL_reg(u64 insn);
+    void FMUL_cbuf(u64 insn);
+    void FMUL_imm(u64 insn);
+    void FMUL32I(u64 insn);
+    void FSET_reg(u64 insn);
+    void FSET_cbuf(u64 insn);
+    void FSET_imm(u64 insn);
+    void FSETP_reg(u64 insn);
+    void FSETP_cbuf(u64 insn);
+    void FSETP_imm(u64 insn);
+    void FSWZADD(u64 insn);
+    void GETCRSPTR(u64 insn);
+    void GETLMEMBASE(u64 insn);
+    void HADD2_reg(u64 insn);
+    void HADD2_cbuf(u64 insn);
+    void HADD2_imm(u64 insn);
+    void HADD2_32I(u64 insn);
+    void HFMA2_reg(u64 insn);
+    void HFMA2_rc(u64 insn);
+    void HFMA2_cr(u64 insn);
+    void HFMA2_imm(u64 insn);
+    void HFMA2_32I(u64 insn);
+    void HMUL2_reg(u64 insn);
+    void HMUL2_cbuf(u64 insn);
+    void HMUL2_imm(u64 insn);
+    void HMUL2_32I(u64 insn);
+    void HSET2_reg(u64 insn);
+    void HSET2_cbuf(u64 insn);
+    void HSET2_imm(u64 insn);
+    void HSETP2_reg(u64 insn);
+    void HSETP2_cbuf(u64 insn);
+    void HSETP2_imm(u64 insn);
+    void I2F_reg(u64 insn);
+    void I2F_cbuf(u64 insn);
+    void I2F_imm(u64 insn);
+    void I2I_reg(u64 insn);
+    void I2I_cbuf(u64 insn);
+    void I2I_imm(u64 insn);
+    void IADD_reg(u64 insn);
+    void IADD_cbuf(u64 insn);
+    void IADD_imm(u64 insn);
+    void IADD3_reg(u64 insn);
+    void IADD3_cbuf(u64 insn);
+    void IADD3_imm(u64 insn);
+    void IADD32I(u64 insn);
+    void ICMP_reg(u64 insn);
+    void ICMP_rc(u64 insn);
+    void ICMP_cr(u64 insn);
+    void ICMP_imm(u64 insn);
+    void IDE(u64 insn);
+    void IDP_reg(u64 insn);
+    void IDP_imm(u64 insn);
+    void IMAD_reg(u64 insn);
+    void IMAD_rc(u64 insn);
+    void IMAD_cr(u64 insn);
+    void IMAD_imm(u64 insn);
+    void IMAD32I(u64 insn);
+    void IMADSP_reg(u64 insn);
+    void IMADSP_rc(u64 insn);
+    void IMADSP_cr(u64 insn);
+    void IMADSP_imm(u64 insn);
+    void IMNMX_reg(u64 insn);
+    void IMNMX_cbuf(u64 insn);
+    void IMNMX_imm(u64 insn);
+    void IMUL_reg(u64 insn);
+    void IMUL_cbuf(u64 insn);
+    void IMUL_imm(u64 insn);
+    void IMUL32I(u64 insn);
+    void IPA(u64 insn);
+    void ISBERD(u64 insn);
+    void ISCADD_reg(u64 insn);
+    void ISCADD_cbuf(u64 insn);
+    void ISCADD_imm(u64 insn);
+    void ISCADD32I(u64 insn);
+    void ISET_reg(u64 insn);
+    void ISET_cbuf(u64 insn);
+    void ISET_imm(u64 insn);
+    void ISETP_reg(u64 insn);
+    void ISETP_cbuf(u64 insn);
+    void ISETP_imm(u64 insn);
+    void JCAL(u64 insn);
+    void JMP(u64 insn);
+    void JMX(u64 insn);
+    void KIL();
+    void LD(u64 insn);
+    void LDC(u64 insn);
+    void LDG(u64 insn);
+    void LDL(u64 insn);
+    void LDS(u64 insn);
+    void LEA_hi_reg(u64 insn);
+    void LEA_hi_cbuf(u64 insn);
+    void LEA_lo_reg(u64 insn);
+    void LEA_lo_cbuf(u64 insn);
+    void LEA_lo_imm(u64 insn);
+    void LEPC(u64 insn);
+    void LONGJMP(u64 insn);
+    void LOP_reg(u64 insn);
+    void LOP_cbuf(u64 insn);
+    void LOP_imm(u64 insn);
+    void LOP3_reg(u64 insn);
+    void LOP3_cbuf(u64 insn);
+    void LOP3_imm(u64 insn);
+    void LOP32I(u64 insn);
+    void MEMBAR(u64 insn);
+    void MOV_reg(u64 insn);
+    void MOV_cbuf(u64 insn);
+    void MOV_imm(u64 insn);
+    void MOV32I(u64 insn);
+    void MUFU(u64 insn);
+    void NOP(u64 insn);
+    void OUT_reg(u64 insn);
+    void OUT_cbuf(u64 insn);
+    void OUT_imm(u64 insn);
+    void P2R_reg(u64 insn);
+    void P2R_cbuf(u64 insn);
+    void P2R_imm(u64 insn);
+    void PBK();
+    void PCNT();
+    void PEXIT(u64 insn);
+    void PIXLD(u64 insn);
+    void PLONGJMP(u64 insn);
+    void POPC_reg(u64 insn);
+    void POPC_cbuf(u64 insn);
+    void POPC_imm(u64 insn);
+    void PRET(u64 insn);
+    void PRMT_reg(u64 insn);
+    void PRMT_rc(u64 insn);
+    void PRMT_cr(u64 insn);
+    void PRMT_imm(u64 insn);
+    void PSET(u64 insn);
+    void PSETP(u64 insn);
+    void R2B(u64 insn);
+    void R2P_reg(u64 insn);
+    void R2P_cbuf(u64 insn);
+    void R2P_imm(u64 insn);
+    void RAM(u64 insn);
+    void RED(u64 insn);
+    void RET(u64 insn);
+    void RRO_reg(u64 insn);
+    void RRO_cbuf(u64 insn);
+    void RRO_imm(u64 insn);
+    void RTT(u64 insn);
+    void S2R(u64 insn);
+    void SAM(u64 insn);
+    void SEL_reg(u64 insn);
+    void SEL_cbuf(u64 insn);
+    void SEL_imm(u64 insn);
+    void SETCRSPTR(u64 insn);
+    void SETLMEMBASE(u64 insn);
+    void SHF_l_reg(u64 insn);
+    void SHF_l_imm(u64 insn);
+    void SHF_r_reg(u64 insn);
+    void SHF_r_imm(u64 insn);
+    void SHFL(u64 insn);
+    void SHL_reg(u64 insn);
+    void SHL_cbuf(u64 insn);
+    void SHL_imm(u64 insn);
+    void SHR_reg(u64 insn);
+    void SHR_cbuf(u64 insn);
+    void SHR_imm(u64 insn);
+    void SSY();
+    void ST(u64 insn);
+    void STG(u64 insn);
+    void STL(u64 insn);
+    void STP(u64 insn);
+    void STS(u64 insn);
+    void SUATOM(u64 insn);
+    void SUATOM_cas(u64 insn);
+    void SULD(u64 insn);
+    void SURED(u64 insn);
+    void SUST(u64 insn);
+    void SYNC(u64 insn);
+    void TEX(u64 insn);
+    void TEX_b(u64 insn);
+    void TEXS(u64 insn);
+    void TLD(u64 insn);
+    void TLD_b(u64 insn);
+    void TLD4(u64 insn);
+    void TLD4_b(u64 insn);
+    void TLD4S(u64 insn);
+    void TLDS(u64 insn);
+    void TMML(u64 insn);
+    void TMML_b(u64 insn);
+    void TXA(u64 insn);
+    void TXD(u64 insn);
+    void TXD_b(u64 insn);
+    void TXQ(u64 insn);
+    void TXQ_b(u64 insn);
+    void VABSDIFF(u64 insn);
+    void VABSDIFF4(u64 insn);
+    void VADD(u64 insn);
+    void VMAD(u64 insn);
+    void VMNMX(u64 insn);
+    void VOTE(u64 insn);
+    void VOTE_vtg(u64 insn);
+    void VSET(u64 insn);
+    void VSETP(u64 insn);
+    void VSHL(u64 insn);
+    void VSHR(u64 insn);
+    void XMAD_reg(u64 insn);
+    void XMAD_rc(u64 insn);
+    void XMAD_cr(u64 insn);
+    void XMAD_imm(u64 insn);
+
+    [[nodiscard]] IR::U32 X(IR::Reg reg);
+    [[nodiscard]] IR::U64 L(IR::Reg reg);
+    [[nodiscard]] IR::F32 F(IR::Reg reg);
+    [[nodiscard]] IR::F64 D(IR::Reg reg);
+
+    void X(IR::Reg dest_reg, const IR::U32& value);
+    void L(IR::Reg dest_reg, const IR::U64& value);
+    void F(IR::Reg dest_reg, const IR::F32& value);
+    void D(IR::Reg dest_reg, const IR::F64& value);
+
+    [[nodiscard]] IR::U32 GetReg8(u64 insn);
+    [[nodiscard]] IR::U32 GetReg20(u64 insn);
+    [[nodiscard]] IR::U32 GetReg39(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
+    [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
+    [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
+
+    [[nodiscard]] IR::U32 GetCbuf(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
+    [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
+    [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
+
+    [[nodiscard]] IR::U32 GetImm20(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
+    [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
+    [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
+
+    [[nodiscard]] IR::U32 GetImm32(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
+
+    void SetZFlag(const IR::U1& value);
+    void SetSFlag(const IR::U1& value);
+    void SetCFlag(const IR::U1& value);
+    void SetOFlag(const IR::U1& value);
+
+    void ResetZero();
+    void ResetSFlag();
+    void ResetCFlag();
+    void ResetOFlag();
+};
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
+          bool cc) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const iadd{insn};
+
+    if (sat) {
+        throw NotImplementedException("IADD SAT");
+    }
+    if (x && po) {
+        throw NotImplementedException("IADD X+PO");
+    }
+    // Operand A is always read from here, negated if needed
+    IR::U32 op_a{v.X(iadd.src_a)};
+    if (neg_a) {
+        op_a = v.ir.INeg(op_a);
+    }
+    // Add both operands
+    IR::U32 result{v.ir.IAdd(op_a, op_b)};
+    if (x) {
+        const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+        result = v.ir.IAdd(result, carry);
+    }
+    if (po) {
+        // .PO adds one to the result
+        result = v.ir.IAdd(result, v.ir.Imm32(1));
+    }
+    if (cc) {
+        // Store flags
+        // TODO: Does this grab the result pre-PO or after?
+        if (po) {
+            throw NotImplementedException("IADD CC+PO");
+        }
+        // TODO: How does CC behave when X is set?
+        if (x) {
+            throw NotImplementedException("IADD X+CC");
+        }
+        v.SetZFlag(v.ir.GetZeroFromOp(result));
+        v.SetSFlag(v.ir.GetSignFromOp(result));
+        v.SetCFlag(v.ir.GetCarryFromOp(result));
+        v.SetOFlag(v.ir.GetOverflowFromOp(result));
+    }
+    // Store result
+    v.X(iadd.dest_reg, result);
+}
+
+void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+    union {
+        u64 insn;
+        BitField<43, 1, u64> x;
+        BitField<47, 1, u64> cc;
+        BitField<48, 2, u64> three_for_po;
+        BitField<48, 1, u64> neg_b;
+        BitField<49, 1, u64> neg_a;
+        BitField<50, 1, u64> sat;
+    } const iadd{insn};
+
+    const bool po{iadd.three_for_po == 3};
+    if (!po && iadd.neg_b != 0) {
+        op_b = v.ir.INeg(op_b);
+    }
+    IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD_reg(u64 insn) {
+    IADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IADD_cbuf(u64 insn) {
+    IADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IADD_imm(u64 insn) {
+    IADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::IADD32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> cc;
+        BitField<53, 1, u64> x;
+        BitField<54, 1, u64> sat;
+        BitField<55, 2, u64> three_for_po;
+        BitField<56, 1, u64> neg_a;
+    } const iadd32i{insn};
+
+    const bool po{iadd32i.three_for_po == 3};
+    const bool neg_a{!po && iadd32i.neg_a != 0};
+    IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Shift : u64 {
+    None,
+    Right,
+    Left,
+};
+enum class Half : u64 {
+    All,
+    Lower,
+    Upper,
+};
+
+[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
+    constexpr bool is_signed{false};
+    switch (half) {
+    case Half::All:
+        return value;
+    case Half::Lower:
+        return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
+    case Half::Upper:
+        return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
+    }
+    throw NotImplementedException("Invalid half");
+}
+
+[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
+    switch (shift) {
+    case Shift::None:
+        return value;
+    case Shift::Right: {
+        // 33-bit RS IADD3 edge case
+        const IR::U1 edge_case{ir.GetCarryFromOp(value)};
+        const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
+        return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
+    }
+    case Shift::Left:
+        return ir.ShiftLeftLogical(value, ir.Imm32(16));
+    }
+    throw NotImplementedException("Invalid shift");
+}
+
+void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
+           Shift shift = Shift::None) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> x;
+        BitField<49, 1, u64> neg_c;
+        BitField<50, 1, u64> neg_b;
+        BitField<51, 1, u64> neg_a;
+    } iadd3{insn};
+
+    if (iadd3.neg_a != 0) {
+        op_a = v.ir.INeg(op_a);
+    }
+    if (iadd3.neg_b != 0) {
+        op_b = v.ir.INeg(op_b);
+    }
+    if (iadd3.neg_c != 0) {
+        op_c = v.ir.INeg(op_c);
+    }
+    IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
+    if (iadd3.x != 0) {
+        // TODO: How does RS behave when X is set?
+        if (shift == Shift::Right) {
+            throw NotImplementedException("IADD3 X+RS");
+        }
+        const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+        lhs_1 = v.ir.IAdd(lhs_1, carry);
+    }
+    const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
+    const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
+
+    v.X(iadd3.dest_reg, result);
+    if (iadd3.cc != 0) {
+        // TODO: How does CC behave when X is set?
+        if (iadd3.x != 0) {
+            throw NotImplementedException("IADD3 X+CC");
+        }
+        v.SetZFlag(v.ir.GetZeroFromOp(result));
+        v.SetSFlag(v.ir.GetSignFromOp(result));
+        v.SetCFlag(v.ir.GetCarryFromOp(result));
+        const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
+        v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD3_reg(u64 insn) {
+    union {
+        u64 insn;
+        BitField<37, 2, Shift> shift;
+        BitField<35, 2, Half> half_a;
+        BitField<33, 2, Half> half_b;
+        BitField<31, 2, Half> half_c;
+    } const iadd3{insn};
+
+    const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
+    const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
+    const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
+    IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
+}
+
+void TranslatorVisitor::IADD3_cbuf(u64 insn) {
+    IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::IADD3_imm(u64 insn) {
+    IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<48, 1, u64> is_signed;
+        BitField<49, 3, CompareOp> compare_op;
+    } const icmp{insn};
+
+    const IR::U32 zero{v.ir.Imm32(0)};
+    const bool is_signed{icmp.is_signed != 0};
+    const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
+
+    const IR::U32 src_reg{v.X(icmp.src_reg)};
+    const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+    v.X(icmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ICMP_reg(u64 insn) {
+    ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_rc(u64 insn) {
+    ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::ICMP_cr(u64 insn) {
+    ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_imm(u64 insn) {
+    ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+                   CompareOp compare_op, bool is_signed, bool x) {
+    return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+             : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+        BitField<43, 1, u64> x;
+        BitField<44, 1, u64> bf;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> is_signed;
+        BitField<49, 3, CompareOp> compare_op;
+    } const iset{insn};
+
+    const IR::U32 src_a{v.X(iset.src_reg)};
+    const bool is_signed{iset.is_signed != 0};
+    const IR::U32 zero{v.ir.Imm32(0)};
+    const bool x{iset.x != 0};
+    const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
+
+    IR::U1 pred{v.ir.GetPred(iset.pred)};
+    if (iset.neg_pred != 0) {
+        pred = v.ir.LogicalNot(pred);
+    }
+    const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
+
+    const IR::U32 one_mask{v.ir.Imm32(-1)};
+    const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+    const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
+    const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+    v.X(iset.dest_reg, result);
+    if (iset.cc != 0) {
+        if (x) {
+            throw NotImplementedException("ISET.CC + X");
+        }
+        const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+        v.SetZFlag(is_zero);
+        if (iset.bf != 0) {
+            v.ResetSFlag();
+        } else {
+            v.SetSFlag(v.ir.LogicalNot(is_zero));
+        }
+        v.ResetCFlag();
+        v.ResetOFlag();
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISET_reg(u64 insn) {
+    ISET(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISET_cbuf(u64 insn) {
+    ISET(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISET_imm(u64 insn) {
+    ISET(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+    F16 = 1,
+    F32 = 2,
+    F64 = 3,
+};
+
+enum class IntFormat : u64 {
+    U8 = 0,
+    U16 = 1,
+    U32 = 2,
+    U64 = 3,
+};
+
+union Encoding {
+    u64 raw;
+    BitField<0, 8, IR::Reg> dest_reg;
+    BitField<8, 2, FloatFormat> float_format;
+    BitField<10, 2, IntFormat> int_format;
+    BitField<13, 1, u64> is_signed;
+    BitField<39, 2, FpRounding> fp_rounding;
+    BitField<41, 2, u64> selector;
+    BitField<47, 1, u64> cc;
+    BitField<45, 1, u64> neg;
+    BitField<49, 1, u64> abs;
+};
+
+bool Is64(u64 insn) {
+    return Encoding{insn}.int_format == IntFormat::U64;
+}
+
+int BitSize(FloatFormat format) {
+    switch (format) {
+    case FloatFormat::F16:
+        return 16;
+    case FloatFormat::F32:
+        return 32;
+    case FloatFormat::F64:
+        return 64;
+    }
+    throw NotImplementedException("Invalid float format {}", format);
+}
+
+IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
+    const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
+    const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
+    const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
+    const IR::U1 is_least{v.ir.IEqual(value, least_value)};
+    return IR::U32{v.ir.Select(is_least, value, absolute)};
+}
+
+void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
+    const Encoding i2f{insn};
+    if (i2f.cc != 0) {
+        throw NotImplementedException("I2F CC");
+    }
+    const bool is_signed{i2f.is_signed != 0};
+    int src_bitsize{};
+    switch (i2f.int_format) {
+    case IntFormat::U8:
+        src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+                                   v.ir.Imm32(8), is_signed);
+        if (i2f.abs != 0) {
+            src = SmallAbs(v, src, 8);
+        }
+        src_bitsize = 8;
+        break;
+    case IntFormat::U16:
+        if (i2f.selector == 1 || i2f.selector == 3) {
+            throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
+        }
+        src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+                                   v.ir.Imm32(16), is_signed);
+        if (i2f.abs != 0) {
+            src = SmallAbs(v, src, 16);
+        }
+        src_bitsize = 16;
+        break;
+    case IntFormat::U32:
+    case IntFormat::U64:
+        if (i2f.selector != 0) {
+            throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
+        }
+        if (i2f.abs != 0 && is_signed) {
+            src = v.ir.IAbs(src);
+        }
+        src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
+        break;
+    }
+    const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
+    const int dst_bitsize{BitSize(i2f.float_format)};
+    const IR::FpControl fp_control{
+        .no_contraction = false,
+        .rounding = CastFpRounding(i2f.fp_rounding),
+        .fmz_mode = IR::FmzMode::DontCare,
+    };
+    auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
+                                static_cast<size_t>(conversion_src_bitsize), is_signed, src,
+                                fp_control)};
+    if (i2f.neg != 0) {
+        if (i2f.abs != 0 || !is_signed) {
+            // We know the value is positive
+            value = v.ir.FPNeg(value);
+        } else {
+            // Only negate if the input isn't the lowest value
+            IR::U1 is_least;
+            if (src_bitsize == 64) {
+                is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
+            } else if (src_bitsize == 32) {
+                is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
+            } else {
+                const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
+                is_least = v.ir.IEqual(src, least_value);
+            }
+            value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
+        }
+    }
+    switch (i2f.float_format) {
+    case FloatFormat::F16: {
+        const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+        v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
+        break;
+    }
+    case FloatFormat::F32:
+        v.F(i2f.dest_reg, value);
+        break;
+    case FloatFormat::F64: {
+        if (!IR::IsAligned(i2f.dest_reg, 2)) {
+            throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
+        }
+        const IR::Value vector{v.ir.UnpackDouble2x32(value)};
+        for (int i = 0; i < 2; ++i) {
+            v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
+        }
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2F_reg(u64 insn) {
+    if (Is64(insn)) {
+        union {
+            u64 raw;
+            BitField<20, 8, IR::Reg> reg;
+        } const value{insn};
+        const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
+        I2F(*this, insn, ir.PackUint2x32(regs));
+    } else {
+        I2F(*this, insn, GetReg20(insn));
+    }
+}
+
+void TranslatorVisitor::I2F_cbuf(u64 insn) {
+    if (Is64(insn)) {
+        I2F(*this, insn, GetPackedCbuf(insn));
+    } else {
+        I2F(*this, insn, GetCbuf(insn));
+    }
+}
+
+void TranslatorVisitor::I2F_imm(u64 insn) {
+    if (Is64(insn)) {
+        I2F(*this, insn, GetPackedImm20(insn));
+    } else {
+        I2F(*this, insn, GetImm20(insn));
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class MaxShift : u64 {
+    U32,
+    Undefined,
+    U64,
+    S64,
+};
+
+IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
+                    bool right_shift, bool is_signed) {
+    if (!right_shift) {
+        return ir.ShiftLeftLogical(packed_int, safe_shift);
+    }
+    if (is_signed) {
+        return ir.ShiftRightArithmetic(packed_int, safe_shift);
+    }
+    return ir.ShiftRightLogical(packed_int, safe_shift);
+}
+
+void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
+         bool right_shift) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<0, 8, IR::Reg> lo_bits_reg;
+        BitField<37, 2, MaxShift> max_shift;
+        BitField<47, 1, u64> cc;
+        BitField<48, 2, u64> x_mode;
+        BitField<50, 1, u64> wrap;
+    } const shf{insn};
+
+    if (shf.cc != 0) {
+        throw NotImplementedException("SHF CC");
+    }
+    if (shf.x_mode != 0) {
+        throw NotImplementedException("SHF X Mode");
+    }
+    if (shf.max_shift == MaxShift::Undefined) {
+        throw NotImplementedException("SHF Use of undefined MaxShift value");
+    }
+    const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
+    const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
+    const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
+    const IR::U32 safe_shift{shf.wrap != 0
+                                 ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
+                                 : v.ir.UMin(shift, max_shift)};
+
+    const bool is_signed{shf.max_shift == MaxShift::S64};
+    const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
+    const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
+
+    const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
+    v.X(shf.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHF_l_reg(u64 insn) {
+    SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_l_imm(u64 insn) {
+    SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_r_reg(u64 insn) {
+    SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
+}
+
+void TranslatorVisitor::SHF_r_imm(u64 insn) {
+    SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+        BitField<43, 2, u64> mode;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> is_signed;
+    } const imnmx{insn};
+
+    if (imnmx.cc != 0) {
+        throw NotImplementedException("IMNMX CC");
+    }
+
+    if (imnmx.mode != 0) {
+        throw NotImplementedException("IMNMX.MODE");
+    }
+
+    const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
+    const IR::U32 op_a{v.X(imnmx.src_reg)};
+    IR::U32 min;
+    IR::U32 max;
+
+    if (imnmx.is_signed != 0) {
+        min = IR::U32{v.ir.SMin(op_a, op_b)};
+        max = IR::U32{v.ir.SMax(op_a, op_b)};
+    } else {
+        min = IR::U32{v.ir.UMin(op_a, op_b)};
+        max = IR::U32{v.ir.UMax(op_a, op_b)};
+    }
+    if (imnmx.neg_pred != 0) {
+        std::swap(min, max);
+    }
+
+    const IR::U32 result{v.ir.Select(pred, min, max)};
+    v.X(imnmx.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IMNMX_reg(u64 insn) {
+    IMNMX(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
+    IMNMX(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IMNMX_imm(u64 insn) {
+    IMNMX(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<40, 1, u64> tilde;
+    } const popc{insn};
+
+    const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
+    const IR::U32 result = v.ir.BitCount(operand);
+    v.X(popc.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::POPC_reg(u64 insn) {
+    POPC(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::POPC_cbuf(u64 insn) {
+    POPC(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::POPC_imm(u64 insn) {
+    POPC(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
+            u64 scale_imm) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> op_a;
+    } const iscadd{insn};
+
+    const bool po{neg_a && neg_b};
+    IR::U32 op_a{v.X(iscadd.op_a)};
+    if (po) {
+        // When PO is present, add one
+        op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
+    } else {
+        // When PO is not present, the bits are interpreted as negation
+        if (neg_a) {
+            op_a = v.ir.INeg(op_a);
+        }
+        if (neg_b) {
+            op_b = v.ir.INeg(op_b);
+        }
+    }
+    // With the operands already processed, scale A
+    const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
+    const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
+
+    const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
+    v.X(iscadd.dest_reg, result);
+
+    if (cc) {
+        v.SetZFlag(v.ir.GetZeroFromOp(result));
+        v.SetSFlag(v.ir.GetSignFromOp(result));
+        const IR::U1 carry{v.ir.GetCarryFromOp(result)};
+        const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
+        v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
+        v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
+    }
+}
+
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+    union {
+        u64 raw;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_b;
+        BitField<49, 1, u64> neg_a;
+        BitField<39, 5, u64> scale;
+    } const iscadd{insn};
+
+    ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISCADD_reg(u64 insn) {
+    ISCADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
+    ISCADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISCADD_imm(u64 insn) {
+    ISCADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::ISCADD32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> cc;
+        BitField<53, 5, u64> scale;
+    } const iscadd{insn};
+
+    return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+                    CompareOp compare_op, bool is_signed, bool x) {
+    return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+             : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+    union {
+        u64 raw;
+        BitField<0, 3, IR::Pred> dest_pred_b;
+        BitField<3, 3, IR::Pred> dest_pred_a;
+        BitField<8, 8, IR::Reg> src_reg_a;
+        BitField<39, 3, IR::Pred> bop_pred;
+        BitField<42, 1, u64> neg_bop_pred;
+        BitField<43, 1, u64> x;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<48, 1, u64> is_signed;
+        BitField<49, 3, CompareOp> compare_op;
+    } const isetp{insn};
+
+    const bool is_signed{isetp.is_signed != 0};
+    const bool x{isetp.x != 0};
+    const BooleanOp bop{isetp.bop};
+    const CompareOp compare_op{isetp.compare_op};
+    const IR::U32 op_a{v.X(isetp.src_reg_a)};
+    const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
+    const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
+    const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+    const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+    v.ir.SetPred(isetp.dest_pred_a, result_a);
+    v.ir.SetPred(isetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISETP_reg(u64 insn) {
+    ISETP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISETP_cbuf(u64 insn) {
+    ISETP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISETP_imm(u64 insn) {
+    ISETP(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg_a;
+        BitField<39, 1, u64> w;
+        BitField<43, 1, u64> x;
+        BitField<47, 1, u64> cc;
+    } const shl{insn};
+
+    if (shl.x != 0) {
+        throw NotImplementedException("SHL.X");
+    }
+    if (shl.cc != 0) {
+        throw NotImplementedException("SHL.CC");
+    }
+    const IR::U32 base{v.X(shl.src_reg_a)};
+    IR::U32 result;
+    if (shl.w != 0) {
+        // When .W is set, the shift value is wrapped
+        // To emulate this we just have to wrap it ourselves.
+        const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
+        result = v.ir.ShiftLeftLogical(base, shift);
+    } else {
+        // When .W is not set, the shift value is clamped between 0 and 32.
+        // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
+        // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
+        //
+        // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
+        // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
+        //  or equal to the bit width of the components of Base."
+        //
+        // And on the GLASM specification it is also safe to evaluate out of bounds:
+        //
+        // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
+        // "The results of a shift operation ("<<") are undefined if the value of the second operand
+        //  is negative, or greater than or equal to the number of bits in the first operand."
+        //
+        // Emphasis on undefined results in contrast to undefined behavior.
+        //
+        const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
+        const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
+        result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
+    }
+    v.X(shl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHL_reg(u64 insn) {
+    SHL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHL_cbuf(u64 insn) {
+    SHL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHL_imm(u64 insn) {
+    SHL(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg_a;
+        BitField<39, 1, u64> is_wrapped;
+        BitField<40, 1, u64> brev;
+        BitField<43, 1, u64> xmode;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> is_signed;
+    } const shr{insn};
+
+    if (shr.xmode != 0) {
+        throw NotImplementedException("SHR.XMODE");
+    }
+    if (shr.cc != 0) {
+        throw NotImplementedException("SHR.CC");
+    }
+
+    IR::U32 base{v.X(shr.src_reg_a)};
+    if (shr.brev == 1) {
+        base = v.ir.BitReverse(base);
+    }
+    IR::U32 result;
+    const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
+    if (shr.is_signed == 1) {
+        result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
+    } else {
+        result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
+    }
+
+    if (shr.is_wrapped == 0) {
+        const IR::U32 zero{v.ir.Imm32(0)};
+        const IR::U32 safe_bits{v.ir.Imm32(32)};
+
+        const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
+        const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
+        const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+        result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
+    }
+    v.X(shr.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHR_reg(u64 insn) {
+    SHR(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHR_cbuf(u64 insn) {
+    SHR(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHR_imm(u64 insn) {
+    SHR(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SelectMode : u64 {
+    Default,
+    CLO,
+    CHI,
+    CSFU,
+    CBCC,
+};
+
+enum class Half : u64 {
+    H0, // Least-significant bits (15:0)
+    H1, // Most-significant bits (31:16)
+};
+
+IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
+    const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
+    return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
+}
+
+void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
+          SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> is_a_signed;
+        BitField<49, 1, u64> is_b_signed;
+        BitField<53, 1, Half> half_a;
+    } const xmad{insn};
+
+    if (x) {
+        throw NotImplementedException("XMAD X");
+    }
+    const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
+    const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
+
+    IR::U32 product{v.ir.IMul(op_a, op_b)};
+    if (psl) {
+        // .PSL shifts the product 16 bits
+        product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
+    }
+    const IR::U32 op_c{[&]() -> IR::U32 {
+        switch (select_mode) {
+        case SelectMode::Default:
+            return src_c;
+        case SelectMode::CLO:
+            return ExtractHalf(v, src_c, Half::H0, false);
+        case SelectMode::CHI:
+            return ExtractHalf(v, src_c, Half::H1, false);
+        case SelectMode::CBCC:
+            return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
+        case SelectMode::CSFU:
+            throw NotImplementedException("XMAD CSFU");
+        }
+        throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
+    }()};
+    IR::U32 result{v.ir.IAdd(product, op_c)};
+    if (mrg) {
+        // .MRG inserts src_b [15:0] into result's [31:16].
+        const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
+        result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
+    }
+    if (xmad.cc) {
+        throw NotImplementedException("XMAD CC");
+    }
+    // Store result
+    v.X(xmad.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::XMAD_reg(u64 insn) {
+    union {
+        u64 raw;
+        BitField<35, 1, Half> half_b;
+        BitField<36, 1, u64> psl;
+        BitField<37, 1, u64> mrg;
+        BitField<38, 1, u64> x;
+        BitField<50, 3, SelectMode> select_mode;
+    } const xmad{insn};
+
+    XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+         xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_rc(u64 insn) {
+    union {
+        u64 raw;
+        BitField<50, 2, SelectMode> select_mode;
+        BitField<52, 1, Half> half_b;
+        BitField<54, 1, u64> x;
+    } const xmad{insn};
+
+    XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
+         xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_cr(u64 insn) {
+    union {
+        u64 raw;
+        BitField<50, 2, SelectMode> select_mode;
+        BitField<52, 1, Half> half_b;
+        BitField<54, 1, u64> x;
+        BitField<55, 1, u64> psl;
+        BitField<56, 1, u64> mrg;
+    } const xmad{insn};
+
+    XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+         xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_imm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 16, u64> src_b;
+        BitField<36, 1, u64> psl;
+        BitField<37, 1, u64> mrg;
+        BitField<38, 1, u64> x;
+        BitField<50, 3, SelectMode> select_mode;
+    } const xmad{insn};
+
+    XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
+         Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class IntegerWidth : u64 {
+    Byte,
+    Short,
+    Word,
+};
+
+[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
+    switch (width) {
+    case IntegerWidth::Byte:
+        return ir.Imm32(8);
+    case IntegerWidth::Short:
+        return ir.Imm32(16);
+    case IntegerWidth::Word:
+        return ir.Imm32(32);
+    default:
+        throw NotImplementedException("Invalid width {}", width);
+    }
+}
+
+[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
+                                     IntegerWidth dst_width) {
+    const IR::U32 zero{ir.Imm32(0)};
+    const IR::U32 count{WidthSize(ir, dst_width)};
+    return ir.BitFieldExtract(src, zero, count, false);
+}
+
+[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
+                                      bool dst_signed, bool src_signed) {
+    IR::U32 min{};
+    IR::U32 max{};
+    const IR::U32 zero{ir.Imm32(0)};
+    switch (dst_width) {
+    case IntegerWidth::Byte:
+        min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
+        max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
+        break;
+    case IntegerWidth::Short:
+        min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
+        max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
+        break;
+    case IntegerWidth::Word:
+        min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
+        max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
+        break;
+    default:
+        throw NotImplementedException("Invalid width {}", dst_width);
+    }
+    const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
+    return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
+}
+
+void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 2, IntegerWidth> dst_fmt;
+        BitField<12, 1, u64> dst_fmt_sign;
+        BitField<10, 2, IntegerWidth> src_fmt;
+        BitField<13, 1, u64> src_fmt_sign;
+        BitField<41, 3, u64> selector;
+        BitField<45, 1, u64> neg;
+        BitField<47, 1, u64> cc;
+        BitField<49, 1, u64> abs;
+        BitField<50, 1, u64> sat;
+    } const i2i{insn};
+
+    if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
+        throw NotImplementedException("16-bit source format incompatible with selector {}",
+                                      i2i.selector);
+    }
+    if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
+        throw NotImplementedException("32-bit source format incompatible with selector {}",
+                                      i2i.selector);
+    }
+
+    const s32 selector{static_cast<s32>(i2i.selector)};
+    const IR::U32 offset{v.ir.Imm32(selector * 8)};
+    const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
+    const bool src_signed{i2i.src_fmt_sign != 0};
+    const bool dst_signed{i2i.dst_fmt_sign != 0};
+    const bool sat{i2i.sat != 0};
+
+    IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
+    if (i2i.abs != 0) {
+        src_values = v.ir.IAbs(src_values);
+    }
+    if (i2i.neg != 0) {
+        src_values = v.ir.INeg(src_values);
+    }
+    const IR::U32 result{
+        sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
+            : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
+
+    v.X(i2i.dest_reg, result);
+    if (i2i.cc != 0) {
+        v.SetZFlag(v.ir.GetZeroFromOp(result));
+        v.SetSFlag(v.ir.GetSignFromOp(result));
+        v.ResetCFlag();
+        v.ResetOFlag();
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2I_reg(u64 insn) {
+    I2I(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::I2I_cbuf(u64 insn) {
+    I2I(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::I2I_imm(u64 insn) {
+    I2I(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    Default,
+    Patch,
+    Prim,
+    Attr,
+};
+
+enum class Shift : u64 {
+    Default,
+    U16,
+    B32,
+};
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISBERD(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<31, 1, u64> skew;
+        BitField<32, 1, u64> o;
+        BitField<33, 2, Mode> mode;
+        BitField<47, 2, Shift> shift;
+    } const isberd{insn};
+
+    if (isberd.skew != 0) {
+        throw NotImplementedException("SKEW");
+    }
+    if (isberd.o != 0) {
+        throw NotImplementedException("O");
+    }
+    if (isberd.mode != Mode::Default) {
+        throw NotImplementedException("Mode {}", isberd.mode.Value());
+    }
+    if (isberd.shift != Shift::Default) {
+        throw NotImplementedException("Shift {}", isberd.shift.Value());
+    }
+    LOG_WARNING(Shader, "(STUBBED) called");
+    X(isberd.dest_reg, X(isberd.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+using namespace LDC;
+namespace {
+std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
+                                 const IR::U32& reg, const IR::U32& imm) {
+    switch (mode) {
+    case Mode::Default:
+        return {imm_index, ir.IAdd(reg, imm)};
+    default:
+        break;
+    }
+    throw NotImplementedException("Mode {}", mode);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDC(u64 insn) {
+    const Encoding ldc{insn};
+    const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
+    const IR::U32 reg{X(ldc.src_reg)};
+    const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
+    const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
+    switch (ldc.size) {
+    case Size::U8:
+        X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
+        break;
+    case Size::S8:
+        X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
+        break;
+    case Size::U16:
+        X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
+        break;
+    case Size::S16:
+        X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
+        break;
+    case Size::B32:
+        X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
+        break;
+    case Size::B64: {
+        if (!IR::IsAligned(ldc.dest_reg, 2)) {
+            throw NotImplementedException("Unaligned destination register");
+        }
+        const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
+        for (int i = 0; i < 2; ++i) {
+            X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+        }
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid size {}", ldc.size.Value());
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell::LDC {
+
+enum class Mode : u64 {
+    Default,
+    IL,
+    IS,
+    ISL,
+};
+
+enum class Size : u64 {
+    U8,
+    S8,
+    U16,
+    S16,
+    B32,
+    B64,
+};
+
+union Encoding {
+    u64 raw;
+    BitField<0, 8, IR::Reg> dest_reg;
+    BitField<8, 8, IR::Reg> src_reg;
+    BitField<20, 16, s64> offset;
+    BitField<36, 5, u64> index;
+    BitField<44, 2, Mode> mode;
+    BitField<48, 3, Size> size;
+};
+
+} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
+            bool neg, bool x) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> offset_lo_reg;
+        BitField<47, 1, u64> cc;
+        BitField<48, 3, IR::Pred> pred;
+    } const lea{insn};
+
+    if (x) {
+        throw NotImplementedException("LEA.HI X");
+    }
+    if (lea.pred != IR::Pred::PT) {
+        throw NotImplementedException("LEA.HI Pred");
+    }
+    if (lea.cc != 0) {
+        throw NotImplementedException("LEA.HI CC");
+    }
+
+    const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+    const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
+    const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
+
+    const s32 hi_scale{32 - static_cast<s32>(scale)};
+    const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
+    const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
+
+    IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
+    v.X(lea.dest_reg, result);
+}
+
+void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> offset_lo_reg;
+        BitField<39, 5, u64> scale;
+        BitField<45, 1, u64> neg;
+        BitField<46, 1, u64> x;
+        BitField<47, 1, u64> cc;
+        BitField<48, 3, IR::Pred> pred;
+    } const lea{insn};
+    if (lea.x != 0) {
+        throw NotImplementedException("LEA.LO X");
+    }
+    if (lea.pred != IR::Pred::PT) {
+        throw NotImplementedException("LEA.LO Pred");
+    }
+    if (lea.cc != 0) {
+        throw NotImplementedException("LEA.LO CC");
+    }
+
+    const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+    const s32 scale{static_cast<s32>(lea.scale)};
+    const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
+    const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
+
+    IR::U32 result{v.ir.IAdd(base, scaled_offset)};
+    v.X(lea.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LEA_hi_reg(u64 insn) {
+    union {
+        u64 insn;
+        BitField<28, 5, u64> scale;
+        BitField<37, 1, u64> neg;
+        BitField<38, 1, u64> x;
+    } const lea{insn};
+
+    LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
+    union {
+        u64 insn;
+        BitField<51, 5, u64> scale;
+        BitField<56, 1, u64> neg;
+        BitField<57, 1, u64> x;
+    } const lea{insn};
+
+    LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_lo_reg(u64 insn) {
+    LEA_lo(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
+    LEA_lo(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LEA_lo_imm(u64 insn) {
+    LEA_lo(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+    B32,
+    B64,
+    B96,
+    B128,
+};
+
+enum class InterpolationMode : u64 {
+    Pass,
+    Multiply,
+    Constant,
+    Sc,
+};
+
+enum class SampleMode : u64 {
+    Default,
+    Centroid,
+    Offset,
+};
+
+u32 NumElements(Size size) {
+    switch (size) {
+    case Size::B32:
+        return 1;
+    case Size::B64:
+        return 2;
+    case Size::B96:
+        return 3;
+    case Size::B128:
+        return 4;
+    }
+    throw InvalidArgument("Invalid size {}", size);
+}
+
+template <typename F>
+void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
+    const IR::U32 index_value{v.X(index_reg)};
+    for (u32 element = 0; element < num_elements; ++element) {
+        const IR::U32 final_offset{
+            element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
+        f(element, final_offset);
+    }
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ALD(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> index_reg;
+        BitField<20, 10, u64> absolute_offset;
+        BitField<20, 11, s64> relative_offset;
+        BitField<39, 8, IR::Reg> vertex_reg;
+        BitField<32, 1, u64> o;
+        BitField<31, 1, u64> patch;
+        BitField<47, 2, Size> size;
+    } const ald{insn};
+
+    const u64 offset{ald.absolute_offset.Value()};
+    if (offset % 4 != 0) {
+        throw NotImplementedException("Unaligned absolute offset {}", offset);
+    }
+    const IR::U32 vertex{X(ald.vertex_reg)};
+    const u32 num_elements{NumElements(ald.size)};
+    if (ald.index_reg == IR::Reg::RZ) {
+        for (u32 element = 0; element < num_elements; ++element) {
+            if (ald.patch != 0) {
+                const IR::Patch patch{offset / 4 + element};
+                F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
+            } else {
+                const IR::Attribute attr{offset / 4 + element};
+                F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
+            }
+        }
+        return;
+    }
+    if (ald.patch != 0) {
+        throw NotImplementedException("Indirect patch read");
+    }
+    HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+        F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
+    });
+}
+
+void TranslatorVisitor::AST(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> src_reg;
+        BitField<8, 8, IR::Reg> index_reg;
+        BitField<20, 10, u64> absolute_offset;
+        BitField<20, 11, s64> relative_offset;
+        BitField<31, 1, u64> patch;
+        BitField<39, 8, IR::Reg> vertex_reg;
+        BitField<47, 2, Size> size;
+    } const ast{insn};
+
+    if (ast.index_reg != IR::Reg::RZ) {
+        throw NotImplementedException("Indexed store");
+    }
+    const u64 offset{ast.absolute_offset.Value()};
+    if (offset % 4 != 0) {
+        throw NotImplementedException("Unaligned absolute offset {}", offset);
+    }
+    const IR::U32 vertex{X(ast.vertex_reg)};
+    const u32 num_elements{NumElements(ast.size)};
+    if (ast.index_reg == IR::Reg::RZ) {
+        for (u32 element = 0; element < num_elements; ++element) {
+            if (ast.patch != 0) {
+                const IR::Patch patch{offset / 4 + element};
+                ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
+            } else {
+                const IR::Attribute attr{offset / 4 + element};
+                ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
+            }
+        }
+        return;
+    }
+    if (ast.patch != 0) {
+        throw NotImplementedException("Indexed tessellation patch store");
+    }
+    HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+        ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
+    });
+}
+
+void TranslatorVisitor::IPA(u64 insn) {
+    // IPA is the instruction used to read varyings from a fragment shader.
+    // gl_FragCoord is mapped to the gl_Position attribute.
+    // It yields unknown results when used outside of the fragment shader stage.
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> index_reg;
+        BitField<20, 8, IR::Reg> multiplier;
+        BitField<30, 8, IR::Attribute> attribute;
+        BitField<38, 1, u64> idx;
+        BitField<51, 1, u64> sat;
+        BitField<52, 2, SampleMode> sample_mode;
+        BitField<54, 2, InterpolationMode> interpolation_mode;
+    } const ipa{insn};
+
+    // Indexed IPAs are used for indexed varyings.
+    // For example:
+    //
+    // in vec4 colors[4];
+    // uniform int idx;
+    // void main() {
+    //     gl_FragColor = colors[idx];
+    // }
+    const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
+    const IR::Attribute attribute{ipa.attribute};
+    IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
+                             : ir.GetAttribute(attribute)};
+    if (IR::IsGeneric(attribute)) {
+        const ProgramHeader& sph{env.SPH()};
+        const u32 attr_index{IR::GenericAttributeIndex(attribute)};
+        const u32 element{static_cast<u32>(attribute) % 4};
+        const std::array input_map{sph.ps.GenericInputMap(attr_index)};
+        const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
+        if (is_perspective) {
+            const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
+            value = ir.FPMul(value, position_w);
+        }
+    }
+    if (ipa.interpolation_mode == InterpolationMode::Multiply) {
+        value = ir.FPMul(value, F(ipa.multiplier));
+    }
+
+    // Saturated IPAs are generally generated out of clamped varyings.
+    // For example: clamp(some_varying, 0.0, 1.0)
+    const bool is_saturated{ipa.sat != 0};
+    if (is_saturated) {
+        if (attribute == IR::Attribute::FrontFace) {
+            throw NotImplementedException("IPA.SAT on FrontFace");
+        }
+        value = ir.FPSaturate(value);
+    }
+
+    F(ipa.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+    U8,
+    S8,
+    U16,
+    S16,
+    B32,
+    B64,
+    B128,
+};
+
+IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> offset_reg;
+        BitField<20, 24, u64> absolute_offset;
+        BitField<20, 24, s64> relative_offset;
+    } const encoding{insn};
+
+    if (encoding.offset_reg == IR::Reg::RZ) {
+        return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
+    } else {
+        const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
+        return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+    }
+}
+
+std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
+    const IR::U32 offset{Offset(v, insn)};
+    if (offset.IsImmediate()) {
+        return {v.ir.Imm32(offset.U32() / 4), offset};
+    } else {
+        return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
+    }
+}
+
+std::pair<int, bool> GetSize(u64 insn) {
+    union {
+        u64 raw;
+        BitField<48, 3, Size> size;
+    } const encoding{insn};
+
+    switch (encoding.size) {
+    case Size::U8:
+        return {8, false};
+    case Size::S8:
+        return {8, true};
+    case Size::U16:
+        return {16, false};
+    case Size::S16:
+        return {16, true};
+    case Size::B32:
+        return {32, false};
+    case Size::B64:
+        return {64, false};
+    case Size::B128:
+        return {128, false};
+    default:
+        throw NotImplementedException("Invalid size {}", encoding.size.Value());
+    }
+}
+
+IR::Reg Reg(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> reg;
+    } const encoding{insn};
+
+    return encoding.reg;
+}
+
+IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
+    return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
+}
+
+IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
+    return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
+}
+
+IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
+    const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
+    const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
+    return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDL(u64 insn) {
+    const auto [word_offset, offset]{WordOffset(*this, insn)};
+    const IR::U32 word{LoadLocal(*this, word_offset, offset)};
+    const IR::Reg dest{Reg(insn)};
+    const auto [bit_size, is_signed]{GetSize(insn)};
+    switch (bit_size) {
+    case 8: {
+        const IR::U32 bit{ByteOffset(ir, offset)};
+        X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
+        break;
+    }
+    case 16: {
+        const IR::U32 bit{ShortOffset(ir, offset)};
+        X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
+        break;
+    }
+    case 32:
+    case 64:
+    case 128:
+        if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+            throw NotImplementedException("Unaligned destination register {}", dest);
+        }
+        X(dest, word);
+        for (int i = 1; i < bit_size / 32; ++i) {
+            const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
+            const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
+            X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
+        }
+        break;
+    }
+}
+
+void TranslatorVisitor::LDS(u64 insn) {
+    const IR::U32 offset{Offset(*this, insn)};
+    const IR::Reg dest{Reg(insn)};
+    const auto [bit_size, is_signed]{GetSize(insn)};
+    const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
+    switch (bit_size) {
+    case 8:
+    case 16:
+    case 32:
+        X(dest, IR::U32{value});
+        break;
+    case 64:
+    case 128:
+        if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+            throw NotImplementedException("Unaligned destination register {}", dest);
+        }
+        for (int element = 0; element < bit_size / 32; ++element) {
+            X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
+        }
+        break;
+    }
+}
+
+void TranslatorVisitor::STL(u64 insn) {
+    const auto [word_offset, offset]{WordOffset(*this, insn)};
+    if (offset.IsImmediate()) {
+        // TODO: Support storing out of bounds at runtime
+        if (offset.U32() >= env.LocalMemorySize()) {
+            LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
+                        offset.U32(), env.LocalMemorySize());
+            return;
+        }
+    }
+    const IR::Reg reg{Reg(insn)};
+    const IR::U32 src{X(reg)};
+    const int bit_size{GetSize(insn).first};
+    switch (bit_size) {
+    case 8: {
+        const IR::U32 bit{ByteOffset(ir, offset)};
+        const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
+        ir.WriteLocal(word_offset, value);
+        break;
+    }
+    case 16: {
+        const IR::U32 bit{ShortOffset(ir, offset)};
+        const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
+        ir.WriteLocal(word_offset, value);
+        break;
+    }
+    case 32:
+    case 64:
+    case 128:
+        if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
+            throw NotImplementedException("Unaligned source register");
+        }
+        ir.WriteLocal(word_offset, src);
+        for (int i = 1; i < bit_size / 32; ++i) {
+            ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
+        }
+        break;
+    }
+}
+
+void TranslatorVisitor::STS(u64 insn) {
+    const IR::U32 offset{Offset(*this, insn)};
+    const IR::Reg reg{Reg(insn)};
+    const int bit_size{GetSize(insn).first};
+    switch (bit_size) {
+    case 8:
+    case 16:
+    case 32:
+        ir.WriteShared(bit_size, offset, X(reg));
+        break;
+    case 64:
+        if (!IR::IsAligned(reg, 2)) {
+            throw NotImplementedException("Unaligned source register {}", reg);
+        }
+        ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
+        break;
+    case 128: {
+        if (!IR::IsAligned(reg, 2)) {
+            throw NotImplementedException("Unaligned source register {}", reg);
+        }
+        const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
+        ir.WriteShared(128, offset, vector);
+        break;
+    }
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LoadSize : u64 {
+    U8,  // Zero-extend
+    S8,  // Sign-extend
+    U16, // Zero-extend
+    S16, // Sign-extend
+    B32,
+    B64,
+    B128,
+    U128, // ???
+};
+
+enum class StoreSize : u64 {
+    U8,  // Zero-extend
+    S8,  // Sign-extend
+    U16, // Zero-extend
+    S16, // Sign-extend
+    B32,
+    B64,
+    B128,
+};
+
+// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class LoadCache : u64 {
+    CA, // Cache at all levels, likely to be accessed again
+    CG, // Cache at global level (cache in L2 and below, not L1)
+    CI, // ???
+    CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
+};
+
+// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class StoreCache : u64 {
+    WB, // Cache write-back all coherent levels
+    CG, // Cache at global level
+    CS, // Cache streaming, likely to be accessed once
+    WT, // Cache write-through (to system memory)
+};
+
+IR::U64 Address(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 24, s64> addr_offset;
+        BitField<20, 24, u64> rz_addr_offset;
+        BitField<45, 1, u64> e;
+    } const mem{insn};
+
+    const IR::U64 address{[&]() -> IR::U64 {
+        if (mem.e == 0) {
+            // LDG/STG without .E uses a 32-bit pointer, zero-extend it
+            return v.ir.UConvert(64, v.X(mem.addr_reg));
+        }
+        if (!IR::IsAligned(mem.addr_reg, 2)) {
+            throw NotImplementedException("Unaligned address register");
+        }
+        // Pack two registers to build the 64-bit address
+        return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
+    }()};
+    const u64 addr_offset{[&]() -> u64 {
+        if (mem.addr_reg == IR::Reg::RZ) {
+            // When RZ is used, the address is an absolute address
+            return static_cast<u64>(mem.rz_addr_offset.Value());
+        } else {
+            return static_cast<u64>(mem.addr_offset.Value());
+        }
+    }()};
+    // Apply the offset
+    return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDG(u64 insn) {
+    // LDG loads global memory into registers
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<46, 2, LoadCache> cache;
+        BitField<48, 3, LoadSize> size;
+    } const ldg{insn};
+
+    // Pointer to load data from
+    const IR::U64 address{Address(*this, insn)};
+    const IR::Reg dest_reg{ldg.dest_reg};
+    switch (ldg.size) {
+    case LoadSize::U8:
+        X(dest_reg, ir.LoadGlobalU8(address));
+        break;
+    case LoadSize::S8:
+        X(dest_reg, ir.LoadGlobalS8(address));
+        break;
+    case LoadSize::U16:
+        X(dest_reg, ir.LoadGlobalU16(address));
+        break;
+    case LoadSize::S16:
+        X(dest_reg, ir.LoadGlobalS16(address));
+        break;
+    case LoadSize::B32:
+        X(dest_reg, ir.LoadGlobal32(address));
+        break;
+    case LoadSize::B64: {
+        if (!IR::IsAligned(dest_reg, 2)) {
+            throw NotImplementedException("Unaligned data registers");
+        }
+        const IR::Value vector{ir.LoadGlobal64(address)};
+        for (int i = 0; i < 2; ++i) {
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+        }
+        break;
+    }
+    case LoadSize::B128:
+    case LoadSize::U128: {
+        if (!IR::IsAligned(dest_reg, 4)) {
+            throw NotImplementedException("Unaligned data registers");
+        }
+        const IR::Value vector{ir.LoadGlobal128(address)};
+        for (int i = 0; i < 4; ++i) {
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+        }
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
+    }
+}
+
+void TranslatorVisitor::STG(u64 insn) {
+    // STG stores registers into global memory.
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> data_reg;
+        BitField<46, 2, StoreCache> cache;
+        BitField<48, 3, StoreSize> size;
+    } const stg{insn};
+
+    // Pointer to store data into
+    const IR::U64 address{Address(*this, insn)};
+    const IR::Reg data_reg{stg.data_reg};
+    switch (stg.size) {
+    case StoreSize::U8:
+        ir.WriteGlobalU8(address, X(data_reg));
+        break;
+    case StoreSize::S8:
+        ir.WriteGlobalS8(address, X(data_reg));
+        break;
+    case StoreSize::U16:
+        ir.WriteGlobalU16(address, X(data_reg));
+        break;
+    case StoreSize::S16:
+        ir.WriteGlobalS16(address, X(data_reg));
+        break;
+    case StoreSize::B32:
+        ir.WriteGlobal32(address, X(data_reg));
+        break;
+    case StoreSize::B64: {
+        if (!IR::IsAligned(data_reg, 2)) {
+            throw NotImplementedException("Unaligned data registers");
+        }
+        const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
+        ir.WriteGlobal64(address, vector);
+        break;
+    }
+    case StoreSize::B128:
+        if (!IR::IsAligned(data_reg, 4)) {
+            throw NotImplementedException("Unaligned data registers");
+        }
+        const IR::Value vector{
+            ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
+        ir.WriteGlobal128(address, vector);
+        break;
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LogicalOp : u64 {
+    AND,
+    OR,
+    XOR,
+    PASS_B,
+};
+
+[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
+                                       const IR::U32& operand_2, LogicalOp op) {
+    switch (op) {
+    case LogicalOp::AND:
+        return ir.BitwiseAnd(operand_1, operand_2);
+    case LogicalOp::OR:
+        return ir.BitwiseOr(operand_1, operand_2);
+    case LogicalOp::XOR:
+        return ir.BitwiseXor(operand_1, operand_2);
+    case LogicalOp::PASS_B:
+        return operand_2;
+    default:
+        throw NotImplementedException("Invalid Logical operation {}", op);
+    }
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
+         LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
+         IR::Pred dest_pred = IR::Pred::PT) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+    } const lop{insn};
+
+    if (x) {
+        throw NotImplementedException("X");
+    }
+    IR::U32 op_a{v.X(lop.src_reg)};
+    if (inv_a != 0) {
+        op_a = v.ir.BitwiseNot(op_a);
+    }
+    if (inv_b != 0) {
+        op_b = v.ir.BitwiseNot(op_b);
+    }
+
+    const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
+    if (pred_op) {
+        const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
+        v.ir.SetPred(dest_pred, pred_result);
+    }
+    if (cc) {
+        if (bit_op == LogicalOp::PASS_B) {
+            v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
+            v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
+        } else {
+            v.SetZFlag(v.ir.GetZeroFromOp(result));
+            v.SetSFlag(v.ir.GetSignFromOp(result));
+        }
+        v.ResetCFlag();
+        v.ResetOFlag();
+    }
+    v.X(lop.dest_reg, result);
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+    union {
+        u64 insn;
+        BitField<39, 1, u64> inv_a;
+        BitField<40, 1, u64> inv_b;
+        BitField<41, 2, LogicalOp> bit_op;
+        BitField<43, 1, u64> x;
+        BitField<44, 2, PredicateOp> pred_op;
+        BitField<47, 1, u64> cc;
+        BitField<48, 3, IR::Pred> dest_pred;
+    } const lop{insn};
+
+    LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
+        lop.pred_op, lop.dest_pred);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP_reg(u64 insn) {
+    LOP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LOP_cbuf(u64 insn) {
+    LOP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LOP_imm(u64 insn) {
+    LOP(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::LOP32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<53, 2, LogicalOp> bit_op;
+        BitField<57, 1, u64> x;
+        BitField<52, 1, u64> cc;
+        BitField<55, 1, u64> inv_a;
+        BitField<56, 1, u64> inv_b;
+    } const lop32i{insn};
+
+    LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
+        lop32i.inv_b != 0, lop32i.bit_op);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
+// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
+IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
+                 u64 ttbl) {
+    IR::U32 r{ir.Imm32(0)};
+    const IR::U32 not_a{ir.BitwiseNot(a)};
+    const IR::U32 not_b{ir.BitwiseNot(b)};
+    const IR::U32 not_c{ir.BitwiseNot(c)};
+    if (ttbl & 0x01) {
+        // r |= ~a & ~b & ~c;
+        const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x02) {
+        // r |= ~a & ~b & c;
+        const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+        const auto rhs{ir.BitwiseAnd(lhs, c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x04) {
+        // r |= ~a & b & ~c;
+        const auto lhs{ir.BitwiseAnd(not_a, b)};
+        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x08) {
+        // r |= ~a & b & c;
+        const auto lhs{ir.BitwiseAnd(not_a, b)};
+        const auto rhs{ir.BitwiseAnd(lhs, c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x10) {
+        // r |= a & ~b & ~c;
+        const auto lhs{ir.BitwiseAnd(a, not_b)};
+        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x20) {
+        // r |= a & ~b & c;
+        const auto lhs{ir.BitwiseAnd(a, not_b)};
+        const auto rhs{ir.BitwiseAnd(lhs, c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x40) {
+        // r |= a & b & ~c;
+        const auto lhs{ir.BitwiseAnd(a, b)};
+        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x80) {
+        // r |= a & b & c;
+        const auto lhs{ir.BitwiseAnd(a, b)};
+        const auto rhs{ir.BitwiseAnd(lhs, c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    return r;
+}
+
+IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<47, 1, u64> cc;
+    } const lop3{insn};
+
+    if (lop3.cc != 0) {
+        throw NotImplementedException("LOP3 CC");
+    }
+
+    const IR::U32 op_a{v.X(lop3.src_reg)};
+    const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
+    v.X(lop3.dest_reg, result);
+    return result;
+}
+
+u64 GetLut48(u64 insn) {
+    union {
+        u64 raw;
+        BitField<48, 8, u64> lut;
+    } const lut{insn};
+    return lut.lut;
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP3_reg(u64 insn) {
+    union {
+        u64 insn;
+        BitField<28, 8, u64> lut;
+        BitField<38, 1, u64> x;
+        BitField<36, 2, PredicateOp> pred_op;
+        BitField<48, 3, IR::Pred> pred;
+    } const lop3{insn};
+
+    if (lop3.x != 0) {
+        throw NotImplementedException("LOP3 X");
+    }
+    const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
+    const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
+    ir.SetPred(lop3.pred, pred_result);
+}
+
+void TranslatorVisitor::LOP3_cbuf(u64 insn) {
+    LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
+}
+
+void TranslatorVisitor::LOP3_imm(u64 insn) {
+    LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    PR,
+    CC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::P2R_reg(u64) {
+    throw NotImplementedException("P2R (reg)");
+}
+
+void TranslatorVisitor::P2R_cbuf(u64) {
+    throw NotImplementedException("P2R (cbuf)");
+}
+
+void TranslatorVisitor::P2R_imm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src;
+        BitField<40, 1, Mode> mode;
+        BitField<41, 2, u64> byte_selector;
+    } const p2r{insn};
+
+    const u32 mask{GetImm20(insn).U32()};
+    const bool pr_mode{p2r.mode == Mode::PR};
+    const u32 num_items{pr_mode ? 7U : 4U};
+    const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
+    IR::U32 insert{ir.Imm32(0)};
+    for (u32 index = 0; index < num_items; ++index) {
+        if (((mask >> index) & 1) == 0) {
+            continue;
+        }
+        const IR::U1 cond{[this, index, pr_mode] {
+            if (pr_mode) {
+                return ir.GetPred(IR::Pred{index});
+            }
+            switch (index) {
+            case 0:
+                return ir.GetZFlag();
+            case 1:
+                return ir.GetSFlag();
+            case 2:
+                return ir.GetCFlag();
+            case 3:
+                return ir.GetOFlag();
+            }
+            throw LogicError("Unreachable P2R index");
+        }()};
+        const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
+        insert = ir.BitwiseOr(insert, bit);
+    }
+    const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
+    X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<39, 4, u64> mask;
+        BitField<12, 4, u64> mov32i_mask;
+    } const mov{insn};
+
+    if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
+        throw NotImplementedException("Non-full move mask");
+    }
+    v.X(mov.dest_reg, src);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::MOV_reg(u64 insn) {
+    MOV(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::MOV_cbuf(u64 insn) {
+    MOV(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::MOV_imm(u64 insn) {
+    MOV(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::MOV32I(u64 insn) {
+    MOV(*this, insn, GetImm32(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    PR,
+    CC,
+};
+
+void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
+    switch (index) {
+    case 0:
+        return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
+    case 1:
+        return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
+    case 2:
+        return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
+    case 3:
+        return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
+    default:
+        throw LogicError("Unreachable R2P index");
+    }
+}
+
+void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<40, 1, Mode> mode;
+        BitField<41, 2, u64> byte_selector;
+    } const r2p{insn};
+    const IR::U32 src{v.X(r2p.src_reg)};
+    const IR::U32 count{v.ir.Imm32(1)};
+    const bool pr_mode{r2p.mode == Mode::PR};
+    const u32 num_items{pr_mode ? 7U : 4U};
+    const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
+    for (u32 index = 0; index < num_items; ++index) {
+        const IR::U32 offset{v.ir.Imm32(offset_base + index)};
+        const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
+        const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
+        const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
+        const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
+        if (pr_mode) {
+            const IR::Pred pred{index};
+            v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
+        } else {
+            SetFlag(v.ir, inv_mask_bit, src_bit, index);
+        }
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::R2P_reg(u64 insn) {
+    R2P(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::R2P_cbuf(u64 insn) {
+    R2P(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::R2P_imm(u64 insn) {
+    R2P(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SpecialRegister : u64 {
+    SR_LANEID = 0,
+    SR_CLOCK = 1,
+    SR_VIRTCFG = 2,
+    SR_VIRTID = 3,
+    SR_PM0 = 4,
+    SR_PM1 = 5,
+    SR_PM2 = 6,
+    SR_PM3 = 7,
+    SR_PM4 = 8,
+    SR_PM5 = 9,
+    SR_PM6 = 10,
+    SR_PM7 = 11,
+    SR12 = 12,
+    SR13 = 13,
+    SR14 = 14,
+    SR_ORDERING_TICKET = 15,
+    SR_PRIM_TYPE = 16,
+    SR_INVOCATION_ID = 17,
+    SR_Y_DIRECTION = 18,
+    SR_THREAD_KILL = 19,
+    SM_SHADER_TYPE = 20,
+    SR_DIRECTCBEWRITEADDRESSLOW = 21,
+    SR_DIRECTCBEWRITEADDRESSHIGH = 22,
+    SR_DIRECTCBEWRITEENABLE = 23,
+    SR_MACHINE_ID_0 = 24,
+    SR_MACHINE_ID_1 = 25,
+    SR_MACHINE_ID_2 = 26,
+    SR_MACHINE_ID_3 = 27,
+    SR_AFFINITY = 28,
+    SR_INVOCATION_INFO = 29,
+    SR_WSCALEFACTOR_XY = 30,
+    SR_WSCALEFACTOR_Z = 31,
+    SR_TID = 32,
+    SR_TID_X = 33,
+    SR_TID_Y = 34,
+    SR_TID_Z = 35,
+    SR_CTA_PARAM = 36,
+    SR_CTAID_X = 37,
+    SR_CTAID_Y = 38,
+    SR_CTAID_Z = 39,
+    SR_NTID = 40,
+    SR_CirQueueIncrMinusOne = 41,
+    SR_NLATC = 42,
+    SR43 = 43,
+    SR_SM_SPA_VERSION = 44,
+    SR_MULTIPASSSHADERINFO = 45,
+    SR_LWINHI = 46,
+    SR_SWINHI = 47,
+    SR_SWINLO = 48,
+    SR_SWINSZ = 49,
+    SR_SMEMSZ = 50,
+    SR_SMEMBANKS = 51,
+    SR_LWINLO = 52,
+    SR_LWINSZ = 53,
+    SR_LMEMLOSZ = 54,
+    SR_LMEMHIOFF = 55,
+    SR_EQMASK = 56,
+    SR_LTMASK = 57,
+    SR_LEMASK = 58,
+    SR_GTMASK = 59,
+    SR_GEMASK = 60,
+    SR_REGALLOC = 61,
+    SR_BARRIERALLOC = 62,
+    SR63 = 63,
+    SR_GLOBALERRORSTATUS = 64,
+    SR65 = 65,
+    SR_WARPERRORSTATUS = 66,
+    SR_WARPERRORSTATUSCLEAR = 67,
+    SR68 = 68,
+    SR69 = 69,
+    SR70 = 70,
+    SR71 = 71,
+    SR_PM_HI0 = 72,
+    SR_PM_HI1 = 73,
+    SR_PM_HI2 = 74,
+    SR_PM_HI3 = 75,
+    SR_PM_HI4 = 76,
+    SR_PM_HI5 = 77,
+    SR_PM_HI6 = 78,
+    SR_PM_HI7 = 79,
+    SR_CLOCKLO = 80,
+    SR_CLOCKHI = 81,
+    SR_GLOBALTIMERLO = 82,
+    SR_GLOBALTIMERHI = 83,
+    SR84 = 84,
+    SR85 = 85,
+    SR86 = 86,
+    SR87 = 87,
+    SR88 = 88,
+    SR89 = 89,
+    SR90 = 90,
+    SR91 = 91,
+    SR92 = 92,
+    SR93 = 93,
+    SR94 = 94,
+    SR95 = 95,
+    SR_HWTASKID = 96,
+    SR_CIRCULARQUEUEENTRYINDEX = 97,
+    SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
+    SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
+};
+
+[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
+    switch (special_register) {
+    case SpecialRegister::SR_INVOCATION_ID:
+        return ir.InvocationId();
+    case SpecialRegister::SR_THREAD_KILL:
+        return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
+    case SpecialRegister::SR_INVOCATION_INFO:
+        LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
+        return ir.Imm32(0x00ff'0000);
+    case SpecialRegister::SR_TID: {
+        const IR::Value tid{ir.LocalInvocationId()};
+        return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
+                                                   IR::U32{ir.CompositeExtract(tid, 1)},
+                                                   ir.Imm32(16), ir.Imm32(8)),
+                                 IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
+    }
+    case SpecialRegister::SR_TID_X:
+        return ir.LocalInvocationIdX();
+    case SpecialRegister::SR_TID_Y:
+        return ir.LocalInvocationIdY();
+    case SpecialRegister::SR_TID_Z:
+        return ir.LocalInvocationIdZ();
+    case SpecialRegister::SR_CTAID_X:
+        return ir.WorkgroupIdX();
+    case SpecialRegister::SR_CTAID_Y:
+        return ir.WorkgroupIdY();
+    case SpecialRegister::SR_CTAID_Z:
+        return ir.WorkgroupIdZ();
+    case SpecialRegister::SR_WSCALEFACTOR_XY:
+        LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
+        return ir.Imm32(Common::BitCast<u32>(1.0f));
+    case SpecialRegister::SR_WSCALEFACTOR_Z:
+        LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
+        return ir.Imm32(Common::BitCast<u32>(1.0f));
+    case SpecialRegister::SR_LANEID:
+        return ir.LaneId();
+    case SpecialRegister::SR_EQMASK:
+        return ir.SubgroupEqMask();
+    case SpecialRegister::SR_LTMASK:
+        return ir.SubgroupLtMask();
+    case SpecialRegister::SR_LEMASK:
+        return ir.SubgroupLeMask();
+    case SpecialRegister::SR_GTMASK:
+        return ir.SubgroupGtMask();
+    case SpecialRegister::SR_GEMASK:
+        return ir.SubgroupGeMask();
+    case SpecialRegister::SR_Y_DIRECTION:
+        return ir.BitCast<IR::U32>(ir.YDirection());
+    case SpecialRegister::SR_AFFINITY:
+        LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
+        return ir.Imm32(0); // This is the default value hardware returns.
+    default:
+        throw NotImplementedException("S2R special register {}", special_register);
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::S2R(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<20, 8, SpecialRegister> src_reg;
+    } const s2r{insn};
+
+    X(s2r.dest_reg, Read(ir, s2r.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
+    throw NotImplementedException("Instruction {} is not implemented", opcode);
+}
+
+void TranslatorVisitor::ATOM_cas(u64) {
+    ThrowNotImplemented(Opcode::ATOM_cas);
+}
+
+void TranslatorVisitor::ATOMS_cas(u64) {
+    ThrowNotImplemented(Opcode::ATOMS_cas);
+}
+
+void TranslatorVisitor::B2R(u64) {
+    ThrowNotImplemented(Opcode::B2R);
+}
+
+void TranslatorVisitor::BPT(u64) {
+    ThrowNotImplemented(Opcode::BPT);
+}
+
+void TranslatorVisitor::BRA(u64) {
+    ThrowNotImplemented(Opcode::BRA);
+}
+
+void TranslatorVisitor::BRK(u64) {
+    ThrowNotImplemented(Opcode::BRK);
+}
+
+void TranslatorVisitor::CAL() {
+    // CAL is a no-op
+}
+
+void TranslatorVisitor::CCTL(u64) {
+    ThrowNotImplemented(Opcode::CCTL);
+}
+
+void TranslatorVisitor::CCTLL(u64) {
+    ThrowNotImplemented(Opcode::CCTLL);
+}
+
+void TranslatorVisitor::CONT(u64) {
+    ThrowNotImplemented(Opcode::CONT);
+}
+
+void TranslatorVisitor::CS2R(u64) {
+    ThrowNotImplemented(Opcode::CS2R);
+}
+
+void TranslatorVisitor::FCHK_reg(u64) {
+    ThrowNotImplemented(Opcode::FCHK_reg);
+}
+
+void TranslatorVisitor::FCHK_cbuf(u64) {
+    ThrowNotImplemented(Opcode::FCHK_cbuf);
+}
+
+void TranslatorVisitor::FCHK_imm(u64) {
+    ThrowNotImplemented(Opcode::FCHK_imm);
+}
+
+void TranslatorVisitor::GETCRSPTR(u64) {
+    ThrowNotImplemented(Opcode::GETCRSPTR);
+}
+
+void TranslatorVisitor::GETLMEMBASE(u64) {
+    ThrowNotImplemented(Opcode::GETLMEMBASE);
+}
+
+void TranslatorVisitor::IDE(u64) {
+    ThrowNotImplemented(Opcode::IDE);
+}
+
+void TranslatorVisitor::IDP_reg(u64) {
+    ThrowNotImplemented(Opcode::IDP_reg);
+}
+
+void TranslatorVisitor::IDP_imm(u64) {
+    ThrowNotImplemented(Opcode::IDP_imm);
+}
+
+void TranslatorVisitor::IMAD_reg(u64) {
+    ThrowNotImplemented(Opcode::IMAD_reg);
+}
+
+void TranslatorVisitor::IMAD_rc(u64) {
+    ThrowNotImplemented(Opcode::IMAD_rc);
+}
+
+void TranslatorVisitor::IMAD_cr(u64) {
+    ThrowNotImplemented(Opcode::IMAD_cr);
+}
+
+void TranslatorVisitor::IMAD_imm(u64) {
+    ThrowNotImplemented(Opcode::IMAD_imm);
+}
+
+void TranslatorVisitor::IMAD32I(u64) {
+    ThrowNotImplemented(Opcode::IMAD32I);
+}
+
+void TranslatorVisitor::IMADSP_reg(u64) {
+    ThrowNotImplemented(Opcode::IMADSP_reg);
+}
+
+void TranslatorVisitor::IMADSP_rc(u64) {
+    ThrowNotImplemented(Opcode::IMADSP_rc);
+}
+
+void TranslatorVisitor::IMADSP_cr(u64) {
+    ThrowNotImplemented(Opcode::IMADSP_cr);
+}
+
+void TranslatorVisitor::IMADSP_imm(u64) {
+    ThrowNotImplemented(Opcode::IMADSP_imm);
+}
+
+void TranslatorVisitor::IMUL_reg(u64) {
+    ThrowNotImplemented(Opcode::IMUL_reg);
+}
+
+void TranslatorVisitor::IMUL_cbuf(u64) {
+    ThrowNotImplemented(Opcode::IMUL_cbuf);
+}
+
+void TranslatorVisitor::IMUL_imm(u64) {
+    ThrowNotImplemented(Opcode::IMUL_imm);
+}
+
+void TranslatorVisitor::IMUL32I(u64) {
+    ThrowNotImplemented(Opcode::IMUL32I);
+}
+
+void TranslatorVisitor::JCAL(u64) {
+    ThrowNotImplemented(Opcode::JCAL);
+}
+
+void TranslatorVisitor::JMP(u64) {
+    ThrowNotImplemented(Opcode::JMP);
+}
+
+void TranslatorVisitor::KIL() {
+    // KIL is a no-op
+}
+
+void TranslatorVisitor::LD(u64) {
+    ThrowNotImplemented(Opcode::LD);
+}
+
+void TranslatorVisitor::LEPC(u64) {
+    ThrowNotImplemented(Opcode::LEPC);
+}
+
+void TranslatorVisitor::LONGJMP(u64) {
+    ThrowNotImplemented(Opcode::LONGJMP);
+}
+
+void TranslatorVisitor::NOP(u64) {
+    // NOP is No-Op.
+}
+
+void TranslatorVisitor::PBK() {
+    // PBK is a no-op
+}
+
+void TranslatorVisitor::PCNT() {
+    // PCNT is a no-op
+}
+
+void TranslatorVisitor::PEXIT(u64) {
+    ThrowNotImplemented(Opcode::PEXIT);
+}
+
+void TranslatorVisitor::PLONGJMP(u64) {
+    ThrowNotImplemented(Opcode::PLONGJMP);
+}
+
+void TranslatorVisitor::PRET(u64) {
+    ThrowNotImplemented(Opcode::PRET);
+}
+
+void TranslatorVisitor::PRMT_reg(u64) {
+    ThrowNotImplemented(Opcode::PRMT_reg);
+}
+
+void TranslatorVisitor::PRMT_rc(u64) {
+    ThrowNotImplemented(Opcode::PRMT_rc);
+}
+
+void TranslatorVisitor::PRMT_cr(u64) {
+    ThrowNotImplemented(Opcode::PRMT_cr);
+}
+
+void TranslatorVisitor::PRMT_imm(u64) {
+    ThrowNotImplemented(Opcode::PRMT_imm);
+}
+
+void TranslatorVisitor::R2B(u64) {
+    ThrowNotImplemented(Opcode::R2B);
+}
+
+void TranslatorVisitor::RAM(u64) {
+    ThrowNotImplemented(Opcode::RAM);
+}
+
+void TranslatorVisitor::RET(u64) {
+    ThrowNotImplemented(Opcode::RET);
+}
+
+void TranslatorVisitor::RTT(u64) {
+    ThrowNotImplemented(Opcode::RTT);
+}
+
+void TranslatorVisitor::SAM(u64) {
+    ThrowNotImplemented(Opcode::SAM);
+}
+
+void TranslatorVisitor::SETCRSPTR(u64) {
+    ThrowNotImplemented(Opcode::SETCRSPTR);
+}
+
+void TranslatorVisitor::SETLMEMBASE(u64) {
+    ThrowNotImplemented(Opcode::SETLMEMBASE);
+}
+
+void TranslatorVisitor::SSY() {
+    // SSY is a no-op
+}
+
+void TranslatorVisitor::ST(u64) {
+    ThrowNotImplemented(Opcode::ST);
+}
+
+void TranslatorVisitor::STP(u64) {
+    ThrowNotImplemented(Opcode::STP);
+}
+
+void TranslatorVisitor::SUATOM_cas(u64) {
+    ThrowNotImplemented(Opcode::SUATOM_cas);
+}
+
+void TranslatorVisitor::SYNC(u64) {
+    ThrowNotImplemented(Opcode::SYNC);
+}
+
+void TranslatorVisitor::TXA(u64) {
+    ThrowNotImplemented(Opcode::TXA);
+}
+
+void TranslatorVisitor::VABSDIFF(u64) {
+    ThrowNotImplemented(Opcode::VABSDIFF);
+}
+
+void TranslatorVisitor::VABSDIFF4(u64) {
+    ThrowNotImplemented(Opcode::VABSDIFF4);
+}
+
+void TranslatorVisitor::VADD(u64) {
+    ThrowNotImplemented(Opcode::VADD);
+}
+
+void TranslatorVisitor::VSET(u64) {
+    ThrowNotImplemented(Opcode::VSET);
+}
+void TranslatorVisitor::VSHL(u64) {
+    ThrowNotImplemented(Opcode::VSHL);
+}
+
+void TranslatorVisitor::VSHR(u64) {
+    ThrowNotImplemented(Opcode::VSHR);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> output_reg; // Not needed on host
+        BitField<39, 1, u64> emit;
+        BitField<40, 1, u64> cut;
+    } const out{insn};
+
+    stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
+
+    if (out.emit != 0) {
+        v.ir.EmitVertex(stream_index);
+    }
+    if (out.cut != 0) {
+        v.ir.EndPrimitive(stream_index);
+    }
+    // Host doesn't need the output register, but we can write to it to avoid undefined reads
+    v.X(out.dest_reg, v.ir.Imm32(0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::OUT_reg(u64 insn) {
+    OUT(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::OUT_cbuf(u64 insn) {
+    OUT(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::OUT_imm(u64 insn) {
+    OUT(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    Default,
+    CovMask,
+    Covered,
+    Offset,
+    CentroidOffset,
+    MyIndex,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::PIXLD(u64 insn) {
+    union {
+        u64 raw;
+        BitField<31, 3, Mode> mode;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 8, s64> addr_offset;
+        BitField<45, 3, IR::Pred> dest_pred;
+    } const pixld{insn};
+
+    if (pixld.dest_pred != IR::Pred::PT) {
+        throw NotImplementedException("Destination predicate");
+    }
+    if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
+        throw NotImplementedException("Non-zero source register");
+    }
+    switch (pixld.mode) {
+    case Mode::MyIndex:
+        X(pixld.dest_reg, ir.SampleId());
+        break;
+    default:
+        throw NotImplementedException("Mode {}", pixld.mode.Value());
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSETP(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 3, IR::Pred> dest_pred_b;
+        BitField<3, 3, IR::Pred> dest_pred_a;
+        BitField<12, 3, IR::Pred> pred_a;
+        BitField<15, 1, u64> neg_pred_a;
+        BitField<24, 2, BooleanOp> bop_1;
+        BitField<29, 3, IR::Pred> pred_b;
+        BitField<32, 1, u64> neg_pred_b;
+        BitField<39, 3, IR::Pred> pred_c;
+        BitField<42, 1, u64> neg_pred_c;
+        BitField<45, 2, BooleanOp> bop_2;
+    } const pset{insn};
+
+    const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+    const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+    const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+    const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+    const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
+    const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
+    const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
+
+    ir.SetPred(pset.dest_pred_a, result_a);
+    ir.SetPred(pset.dest_pred_b, result_b);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSET(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<12, 3, IR::Pred> pred_a;
+        BitField<15, 1, u64> neg_pred_a;
+        BitField<24, 2, BooleanOp> bop_1;
+        BitField<29, 3, IR::Pred> pred_b;
+        BitField<32, 1, u64> neg_pred_b;
+        BitField<39, 3, IR::Pred> pred_c;
+        BitField<42, 1, u64> neg_pred_c;
+        BitField<44, 1, u64> bf;
+        BitField<45, 2, BooleanOp> bop_2;
+        BitField<47, 1, u64> cc;
+    } const pset{insn};
+
+    const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+    const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+    const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+    const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+    const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
+
+    const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
+    const IR::U32 zero{ir.Imm32(0)};
+
+    const IR::U32 result{ir.Select(res_2, true_result, zero)};
+
+    X(pset.dest_reg, result);
+    if (pset.cc != 0) {
+        const IR::U1 is_zero{ir.IEqual(result, zero)};
+        SetZFlag(is_zero);
+        if (pset.bf != 0) {
+            ResetSFlag();
+        } else {
+            SetSFlag(ir.LogicalNot(is_zero));
+        }
+        ResetOFlag();
+        ResetCFlag();
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<39, 3, IR::Pred> pred;
+        BitField<42, 1, u64> neg_pred;
+    } const sel{insn};
+
+    const IR::U1 pred = v.ir.GetPred(sel.pred);
+    IR::U32 op_a{v.X(sel.src_reg)};
+    IR::U32 op_b{src};
+    if (sel.neg_pred != 0) {
+        std::swap(op_a, op_b);
+    }
+    const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
+
+    v.X(sel.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SEL_reg(u64 insn) {
+    SEL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SEL_cbuf(u64 insn) {
+    SEL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SEL_imm(u64 insn) {
+    SEL(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+    _1D,
+    BUFFER_1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+};
+
+enum class Size : u64 {
+    U32,
+    S32,
+    U64,
+    S64,
+    F32FTZRN,
+    F16x2FTZRN,
+    SD32,
+    SD64,
+};
+
+enum class AtomicOp : u64 {
+    ADD,
+    MIN,
+    MAX,
+    INC,
+    DEC,
+    AND,
+    OR,
+    XOR,
+    EXCH,
+};
+
+enum class Clamp : u64 {
+    IGN,
+    Default,
+    TRAP,
+};
+
+TextureType GetType(Type type) {
+    switch (type) {
+    case Type::_1D:
+        return TextureType::Color1D;
+    case Type::BUFFER_1D:
+        return TextureType::Buffer;
+    case Type::ARRAY_1D:
+        return TextureType::ColorArray1D;
+    case Type::_2D:
+        return TextureType::Color2D;
+    case Type::ARRAY_2D:
+        return TextureType::ColorArray2D;
+    case Type::_3D:
+        return TextureType::Color3D;
+    }
+    throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+    switch (type) {
+    case Type::_1D:
+    case Type::BUFFER_1D:
+        return v.X(reg);
+    case Type::_2D:
+        return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+    case Type::_3D:
+        return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+    default:
+        break;
+    }
+    throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
+                        const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
+                        bool is_signed) {
+    switch (op) {
+    case AtomicOp::ADD:
+        return ir.ImageAtomicIAdd(handle, coords, op_b, info);
+    case AtomicOp::MIN:
+        return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
+    case AtomicOp::MAX:
+        return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
+    case AtomicOp::INC:
+        return ir.ImageAtomicInc(handle, coords, op_b, info);
+    case AtomicOp::DEC:
+        return ir.ImageAtomicDec(handle, coords, op_b, info);
+    case AtomicOp::AND:
+        return ir.ImageAtomicAnd(handle, coords, op_b, info);
+    case AtomicOp::OR:
+        return ir.ImageAtomicOr(handle, coords, op_b, info);
+    case AtomicOp::XOR:
+        return ir.ImageAtomicXor(handle, coords, op_b, info);
+    case AtomicOp::EXCH:
+        return ir.ImageAtomicExchange(handle, coords, op_b, info);
+    default:
+        throw NotImplementedException("Atomic Operation {}", op);
+    }
+}
+
+ImageFormat Format(Size size) {
+    switch (size) {
+    case Size::U32:
+    case Size::S32:
+    case Size::SD32:
+        return ImageFormat::R32_UINT;
+    default:
+        break;
+    }
+    throw NotImplementedException("Invalid size {}", size);
+}
+
+bool IsSizeInt32(Size size) {
+    switch (size) {
+    case Size::U32:
+    case Size::S32:
+    case Size::SD32:
+        return true;
+    default:
+        return false;
+    }
+}
+
+void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
+                 IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
+                 u64 bound_offset, bool is_bindless, bool write_result) {
+    if (clamp != Clamp::IGN) {
+        throw NotImplementedException("Clamp {}", clamp);
+    }
+    if (!IsSizeInt32(size)) {
+        throw NotImplementedException("Size {}", size);
+    }
+    const bool is_signed{size == Size::S32};
+    const ImageFormat format{Format(size)};
+    const TextureType tex_type{GetType(type)};
+    const IR::Value coords{MakeCoords(v, coord_reg, type)};
+
+    const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
+                                          : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
+    IR::TextureInstInfo info{};
+    info.type.Assign(tex_type);
+    info.image_format.Assign(format);
+
+    // TODO: float/64-bit operand
+    const IR::Value op_b{v.X(operand_reg)};
+    const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
+
+    if (write_result) {
+        v.X(dest_reg, IR::U32{color});
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SUATOM(u64 insn) {
+    union {
+        u64 raw;
+        BitField<54, 1, u64> is_bindless;
+        BitField<29, 4, AtomicOp> op;
+        BitField<33, 3, Type> type;
+        BitField<51, 3, Size> size;
+        BitField<49, 2, Clamp> clamp;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> operand_reg;
+        BitField<36, 13, u64> bound_offset;    // !is_bindless
+        BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
+    } const suatom{insn};
+
+    ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
+                suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
+                suatom.is_bindless != 0, true);
+}
+
+void TranslatorVisitor::SURED(u64 insn) {
+    // TODO: confirm offsets
+    union {
+        u64 raw;
+        BitField<51, 1, u64> is_bound;
+        BitField<21, 3, AtomicOp> op;
+        BitField<33, 3, Type> type;
+        BitField<20, 3, Size> size;
+        BitField<49, 2, Clamp> clamp;
+        BitField<0, 8, IR::Reg> operand_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<36, 13, u64> bound_offset;    // is_bound
+        BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+    } const sured{insn};
+    ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
+                sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
+                sured.is_bound == 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+    _1D,
+    BUFFER_1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+};
+
+constexpr unsigned R = 1 << 0;
+constexpr unsigned G = 1 << 1;
+constexpr unsigned B = 1 << 2;
+constexpr unsigned A = 1 << 3;
+
+constexpr std::array MASK{
+    0U,            //
+    R,             //
+    G,             //
+    R | G,         //
+    B,             //
+    R | B,         //
+    G | B,         //
+    R | G | B,     //
+    A,             //
+    R | A,         //
+    G | A,         //
+    R | G | A,     //
+    B | A,         //
+    R | B | A,     //
+    G | B | A,     //
+    R | G | B | A, //
+};
+
+enum class Size : u64 {
+    U8,
+    S8,
+    U16,
+    S16,
+    B32,
+    B64,
+    B128,
+};
+
+enum class Clamp : u64 {
+    IGN,
+    Default,
+    TRAP,
+};
+
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
+enum class LoadCache : u64 {
+    CA, // Cache at all levels, likely to be accessed again
+    CG, // Cache at global level (L2 and below, not L1)
+    CI, // ???
+    CV, // Don't cache and fetch again (volatile)
+};
+
+enum class StoreCache : u64 {
+    WB, // Cache write-back all coherent levels
+    CG, // Cache at global level (L2 and below, not L1)
+    CS, // Cache streaming, likely to be accessed once
+    WT, // Cache write-through (to system memory, volatile?)
+};
+
+ImageFormat Format(Size size) {
+    switch (size) {
+    case Size::U8:
+        return ImageFormat::R8_UINT;
+    case Size::S8:
+        return ImageFormat::R8_SINT;
+    case Size::U16:
+        return ImageFormat::R16_UINT;
+    case Size::S16:
+        return ImageFormat::R16_SINT;
+    case Size::B32:
+        return ImageFormat::R32_UINT;
+    case Size::B64:
+        return ImageFormat::R32G32_UINT;
+    case Size::B128:
+        return ImageFormat::R32G32B32A32_UINT;
+    }
+    throw NotImplementedException("Invalid size {}", size);
+}
+
+int SizeInRegs(Size size) {
+    switch (size) {
+    case Size::U8:
+    case Size::S8:
+    case Size::U16:
+    case Size::S16:
+    case Size::B32:
+        return 1;
+    case Size::B64:
+        return 2;
+    case Size::B128:
+        return 4;
+    }
+    throw NotImplementedException("Invalid size {}", size);
+}
+
+TextureType GetType(Type type) {
+    switch (type) {
+    case Type::_1D:
+        return TextureType::Color1D;
+    case Type::BUFFER_1D:
+        return TextureType::Buffer;
+    case Type::ARRAY_1D:
+        return TextureType::ColorArray1D;
+    case Type::_2D:
+        return TextureType::Color2D;
+    case Type::ARRAY_2D:
+        return TextureType::ColorArray2D;
+    case Type::_3D:
+        return TextureType::Color3D;
+    }
+    throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+    const auto array{[&](int index) {
+        return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
+    }};
+    switch (type) {
+    case Type::_1D:
+    case Type::BUFFER_1D:
+        return v.X(reg);
+    case Type::ARRAY_1D:
+        return v.ir.CompositeConstruct(v.X(reg), array(1));
+    case Type::_2D:
+        return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+    case Type::ARRAY_2D:
+        return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
+    case Type::_3D:
+        return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+    }
+    throw NotImplementedException("Invalid type {}", type);
+}
+
+unsigned SwizzleMask(u64 swizzle) {
+    if (swizzle == 0 || swizzle >= MASK.size()) {
+        throw NotImplementedException("Invalid swizzle {}", swizzle);
+    }
+    return MASK[swizzle];
+}
+
+IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
+    std::array<IR::U32, 4> colors;
+    for (int i = 0; i < num_regs; ++i) {
+        colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
+    }
+    for (int i = num_regs; i < 4; ++i) {
+        colors[static_cast<size_t>(i)] = ir.Imm32(0);
+    }
+    return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SULD(u64 insn) {
+    union {
+        u64 raw;
+        BitField<51, 1, u64> is_bound;
+        BitField<52, 1, u64> d;
+        BitField<23, 1, u64> ba;
+        BitField<33, 3, Type> type;
+        BitField<24, 2, LoadCache> cache;
+        BitField<20, 3, Size> size;   // .D
+        BitField<20, 4, u64> swizzle; // .P
+        BitField<49, 2, Clamp> clamp;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<36, 13, u64> bound_offset;    // is_bound
+        BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+    } const suld{insn};
+
+    if (suld.clamp != Clamp::IGN) {
+        throw NotImplementedException("Clamp {}", suld.clamp.Value());
+    }
+    if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
+        throw NotImplementedException("Cache {}", suld.cache.Value());
+    }
+    const bool is_typed{suld.d != 0};
+    if (is_typed && suld.ba != 0) {
+        throw NotImplementedException("BA");
+    }
+
+    const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
+    const TextureType type{GetType(suld.type)};
+    const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
+    const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
+                                            : X(suld.bindless_reg)};
+    IR::TextureInstInfo info{};
+    info.type.Assign(type);
+    info.image_format.Assign(format);
+
+    const IR::Value result{ir.ImageRead(handle, coords, info)};
+    IR::Reg dest_reg{suld.dest_reg};
+    if (is_typed) {
+        const int num_regs{SizeInRegs(suld.size)};
+        for (int i = 0; i < num_regs; ++i) {
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+        }
+    } else {
+        const unsigned mask{SwizzleMask(suld.swizzle)};
+        const int bits{std::popcount(mask)};
+        if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
+            throw NotImplementedException("Unaligned destination register");
+        }
+        for (unsigned component = 0; component < 4; ++component) {
+            if (((mask >> component) & 1) == 0) {
+                continue;
+            }
+            X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
+            ++dest_reg;
+        }
+    }
+}
+
+void TranslatorVisitor::SUST(u64 insn) {
+    union {
+        u64 raw;
+        BitField<51, 1, u64> is_bound;
+        BitField<52, 1, u64> d;
+        BitField<23, 1, u64> ba;
+        BitField<33, 3, Type> type;
+        BitField<24, 2, StoreCache> cache;
+        BitField<20, 3, Size> size;   // .D
+        BitField<20, 4, u64> swizzle; // .P
+        BitField<49, 2, Clamp> clamp;
+        BitField<0, 8, IR::Reg> data_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<36, 13, u64> bound_offset;    // is_bound
+        BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+    } const sust{insn};
+
+    if (sust.clamp != Clamp::IGN) {
+        throw NotImplementedException("Clamp {}", sust.clamp.Value());
+    }
+    if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
+        throw NotImplementedException("Cache {}", sust.cache.Value());
+    }
+    const bool is_typed{sust.d != 0};
+    if (is_typed && sust.ba != 0) {
+        throw NotImplementedException("BA");
+    }
+    const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
+    const TextureType type{GetType(sust.type)};
+    const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
+    const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
+                                            : X(sust.bindless_reg)};
+    IR::TextureInstInfo info{};
+    info.type.Assign(type);
+    info.image_format.Assign(format);
+
+    IR::Value color;
+    if (is_typed) {
+        color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
+    } else {
+        const unsigned mask{SwizzleMask(sust.swizzle)};
+        if (mask != 0xf) {
+            throw NotImplementedException("Non-full mask");
+        }
+        color = MakeColor(ir, sust.data_reg, 4);
+    }
+    ir.ImageWrite(handle, coords, color, info);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Blod : u64 {
+    None,
+    LZ,
+    LB,
+    LL,
+    INVALIDBLOD4,
+    INVALIDBLOD5,
+    LBA,
+    LLA,
+};
+
+enum class TextureType : u64 {
+    _1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+    ARRAY_3D,
+    CUBE,
+    ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+    switch (type) {
+    case TextureType::_1D:
+        return Shader::TextureType::Color1D;
+    case TextureType::ARRAY_1D:
+        return Shader::TextureType::ColorArray1D;
+    case TextureType::_2D:
+        return Shader::TextureType::Color2D;
+    case TextureType::ARRAY_2D:
+        return Shader::TextureType::ColorArray2D;
+    case TextureType::_3D:
+        return Shader::TextureType::Color3D;
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return Shader::TextureType::ColorCube;
+    case TextureType::ARRAY_CUBE:
+        return Shader::TextureType::ColorArrayCube;
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+    const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+    switch (type) {
+    case TextureType::_1D:
+        return v.F(reg);
+    case TextureType::ARRAY_1D:
+        return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+    case TextureType::_2D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+    case TextureType::_3D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_CUBE:
+        return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
+    switch (blod) {
+    case Blod::None:
+        return v.ir.Imm32(0.0f);
+    case Blod::LZ:
+        return v.ir.Imm32(0.0f);
+    case Blod::LB:
+    case Blod::LL:
+    case Blod::LBA:
+    case Blod::LLA:
+        return v.F(reg++);
+    case Blod::INVALIDBLOD4:
+    case Blod::INVALIDBLOD5:
+        break;
+    }
+    throw NotImplementedException("Invalid blod {}", blod);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+    const IR::U32 value{v.X(reg++)};
+    switch (type) {
+    case TextureType::_1D:
+    case TextureType::ARRAY_1D:
+        return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+    case TextureType::_2D:
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(
+            v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+    case TextureType::_3D:
+    case TextureType::ARRAY_3D:
+        return v.ir.CompositeConstruct(
+            v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+    case TextureType::CUBE:
+    case TextureType::ARRAY_CUBE:
+        throw NotImplementedException("Illegal offset on CUBE sample");
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+bool HasExplicitLod(Blod blod) {
+    switch (blod) {
+    case Blod::LL:
+    case Blod::LLA:
+    case Blod::LZ:
+        return true;
+    default:
+        return false;
+    }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
+          std::optional<u32> cbuf_offset) {
+    union {
+        u64 raw;
+        BitField<35, 1, u64> ndv;
+        BitField<49, 1, u64> nodep;
+        BitField<50, 1, u64> dc;
+        BitField<51, 3, IR::Pred> sparse_pred;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> meta_reg;
+        BitField<28, 3, TextureType> type;
+        BitField<31, 4, u64> mask;
+    } const tex{insn};
+
+    if (lc) {
+        throw NotImplementedException("LC");
+    }
+    const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
+
+    IR::Reg meta_reg{tex.meta_reg};
+    IR::Value handle;
+    IR::Value offset;
+    IR::F32 dref;
+    IR::F32 lod_clamp;
+    if (cbuf_offset) {
+        handle = v.ir.Imm32(*cbuf_offset);
+    } else {
+        handle = v.X(meta_reg++);
+    }
+    const IR::F32 lod{MakeLod(v, meta_reg, blod)};
+    if (aoffi) {
+        offset = MakeOffset(v, meta_reg, tex.type);
+    }
+    if (tex.dc != 0) {
+        dref = v.F(meta_reg++);
+    }
+    IR::TextureInstInfo info{};
+    info.type.Assign(GetType(tex.type));
+    info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
+    info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
+    info.has_lod_clamp.Assign(lc ? 1 : 0);
+
+    const IR::Value sample{[&]() -> IR::Value {
+        if (tex.dc == 0) {
+            if (HasExplicitLod(blod)) {
+                return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
+            } else {
+                return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
+            }
+        }
+        if (HasExplicitLod(blod)) {
+            return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
+        } else {
+            return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+                                                   info);
+        }
+    }()};
+
+    IR::Reg dest_reg{tex.dest_reg};
+    for (int element = 0; element < 4; ++element) {
+        if (((tex.mask >> element) & 1) == 0) {
+            continue;
+        }
+        IR::F32 value;
+        if (tex.dc != 0) {
+            value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
+        } else {
+            value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
+        }
+        v.F(dest_reg, value);
+        ++dest_reg;
+    }
+    if (tex.sparse_pred != IR::Pred::PT) {
+        v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEX(u64 insn) {
+    union {
+        u64 raw;
+        BitField<54, 1, u64> aoffi;
+        BitField<55, 3, Blod> blod;
+        BitField<58, 1, u64> lc;
+        BitField<36, 13, u64> cbuf_offset;
+    } const tex{insn};
+
+    Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TEX_b(u64 insn) {
+    union {
+        u64 raw;
+        BitField<36, 1, u64> aoffi;
+        BitField<37, 3, Blod> blod;
+        BitField<40, 1, u64> lc;
+    } const tex{insn};
+
+    Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+    F16,
+    F32,
+};
+
+union Encoding {
+    u64 raw;
+    BitField<59, 1, Precision> precision;
+    BitField<53, 4, u64> encoding;
+    BitField<49, 1, u64> nodep;
+    BitField<28, 8, IR::Reg> dest_reg_b;
+    BitField<0, 8, IR::Reg> dest_reg_a;
+    BitField<8, 8, IR::Reg> src_reg_a;
+    BitField<20, 8, IR::Reg> src_reg_b;
+    BitField<36, 13, u64> cbuf_offset;
+    BitField<50, 3, u64> swizzle;
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+    R,     //
+    G,     //
+    B,     //
+    A,     //
+    R | G, //
+    R | A, //
+    G | A, //
+    B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+    R | G | B,     //
+    R | G | A,     //
+    R | B | A,     //
+    G | B | A,     //
+    R | G | B | A, //
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+    if (!IR::IsAligned(reg, alignment)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+}
+
+template <typename... Args>
+IR::Value Composite(TranslatorVisitor& v, Args... regs) {
+    return v.ir.CompositeConstruct(v.F(regs)...);
+}
+
+IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
+    return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+    const Encoding texs{insn};
+    const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
+    const IR::F32 zero{v.ir.Imm32(0.0f)};
+    const IR::Reg reg_a{texs.src_reg_a};
+    const IR::Reg reg_b{texs.src_reg_b};
+    IR::TextureInstInfo info{};
+    if (texs.precision == Precision::F16) {
+        info.relaxed_precision.Assign(1);
+    }
+    switch (texs.encoding) {
+    case 0: // 1D.LZ
+        info.type.Assign(TextureType::Color1D);
+        return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
+    case 1: // 2D
+        info.type.Assign(TextureType::Color2D);
+        return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
+    case 2: // 2D.LZ
+        info.type.Assign(TextureType::Color2D);
+        return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
+    case 3: // 2D.LL
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Color2D);
+        return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
+                                           info);
+    case 4: // 2D.DC
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Color2D);
+        info.is_depth.Assign(1);
+        return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+                                               {}, {}, {}, info);
+    case 5: // 2D.LL.DC
+        CheckAlignment(reg_a, 2);
+        CheckAlignment(reg_b, 2);
+        info.type.Assign(TextureType::Color2D);
+        info.is_depth.Assign(1);
+        return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
+                                               v.F(reg_b + 1), v.F(reg_b), {}, info);
+    case 6: // 2D.LZ.DC
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Color2D);
+        info.is_depth.Assign(1);
+        return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+                                               zero, {}, info);
+    case 7: // ARRAY_2D
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::ColorArray2D);
+        return v.ir.ImageSampleImplicitLod(
+            handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+            {}, {}, {}, info);
+    case 8: // ARRAY_2D.LZ
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::ColorArray2D);
+        return v.ir.ImageSampleExplicitLod(
+            handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+            zero, {}, info);
+    case 9: // ARRAY_2D.LZ.DC
+        CheckAlignment(reg_a, 2);
+        CheckAlignment(reg_b, 2);
+        info.type.Assign(TextureType::ColorArray2D);
+        info.is_depth.Assign(1);
+        return v.ir.ImageSampleDrefExplicitLod(
+            handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+            v.F(reg_b + 1), zero, {}, info);
+    case 10: // 3D
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Color3D);
+        return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+                                           {}, info);
+    case 11: // 3D.LZ
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Color3D);
+        return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
+                                           info);
+    case 12: // CUBE
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::ColorCube);
+        return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+                                           {}, info);
+    case 13: // CUBE.LL
+        CheckAlignment(reg_a, 2);
+        CheckAlignment(reg_b, 2);
+        info.type.Assign(TextureType::ColorCube);
+        return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
+                                           v.F(reg_b + 1), {}, info);
+    default:
+        throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
+    }
+}
+
+unsigned Swizzle(u64 insn) {
+    const Encoding texs{insn};
+    const size_t encoding{texs.swizzle};
+    if (texs.dest_reg_b == IR::Reg::RZ) {
+        if (encoding >= RG_LUT.size()) {
+            throw NotImplementedException("Illegal RG encoding {}", encoding);
+        }
+        return RG_LUT[encoding];
+    } else {
+        if (encoding >= RGBA_LUT.size()) {
+            throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+        }
+        return RGBA_LUT[encoding];
+    }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+    const bool is_shadow{sample.Type() == IR::Type::F32};
+    if (is_shadow) {
+        const bool is_alpha{component == 3};
+        return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
+    } else {
+        return IR::F32{v.ir.CompositeExtract(sample, component)};
+    }
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+    const Encoding texs{insn};
+    switch (index) {
+    case 0:
+        return texs.dest_reg_a;
+    case 1:
+        CheckAlignment(texs.dest_reg_a, 2);
+        return texs.dest_reg_a + 1;
+    case 2:
+        return texs.dest_reg_b;
+    case 3:
+        CheckAlignment(texs.dest_reg_b, 2);
+        return texs.dest_reg_b + 1;
+    }
+    throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+    const unsigned swizzle{Swizzle(insn)};
+    unsigned store_index{0};
+    for (unsigned component = 0; component < 4; ++component) {
+        if (((swizzle >> component) & 1) == 0) {
+            continue;
+        }
+        const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+        v.F(dest, Extract(v, sample, component));
+        ++store_index;
+    }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+    return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+    const unsigned swizzle{Swizzle(insn)};
+    unsigned store_index{0};
+    std::array<IR::F32, 4> swizzled;
+    for (unsigned component = 0; component < 4; ++component) {
+        if (((swizzle >> component) & 1) == 0) {
+            continue;
+        }
+        swizzled[store_index] = Extract(v, sample, component);
+        ++store_index;
+    }
+    const IR::F32 zero{v.ir.Imm32(0.0f)};
+    const Encoding texs{insn};
+    switch (store_index) {
+    case 1:
+        v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
+        break;
+    case 2:
+    case 3:
+    case 4:
+        v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+        switch (store_index) {
+        case 2:
+            break;
+        case 3:
+            v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
+            break;
+        case 4:
+            v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+            break;
+        }
+        break;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEXS(u64 insn) {
+    const IR::Value sample{Sample(*this, insn)};
+    if (Encoding{insn}.precision == Precision::F32) {
+        Store32(*this, insn, sample);
+    } else {
+        Store16(*this, insn, sample);
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+    _1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+    ARRAY_3D,
+    CUBE,
+    ARRAY_CUBE,
+};
+
+enum class OffsetType : u64 {
+    None = 0,
+    AOFFI,
+    PTP,
+    Invalid,
+};
+
+enum class ComponentType : u64 {
+    R = 0,
+    G = 1,
+    B = 2,
+    A = 3,
+};
+
+Shader::TextureType GetType(TextureType type) {
+    switch (type) {
+    case TextureType::_1D:
+        return Shader::TextureType::Color1D;
+    case TextureType::ARRAY_1D:
+        return Shader::TextureType::ColorArray1D;
+    case TextureType::_2D:
+        return Shader::TextureType::Color2D;
+    case TextureType::ARRAY_2D:
+        return Shader::TextureType::ColorArray2D;
+    case TextureType::_3D:
+        return Shader::TextureType::Color3D;
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return Shader::TextureType::ColorCube;
+    case TextureType::ARRAY_CUBE:
+        return Shader::TextureType::ColorArrayCube;
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+    const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+    switch (type) {
+    case TextureType::_1D:
+        return v.F(reg);
+    case TextureType::ARRAY_1D:
+        return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+    case TextureType::_2D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+    case TextureType::_3D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_CUBE:
+        return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+    const IR::U32 value{v.X(reg++)};
+    switch (type) {
+    case TextureType::_1D:
+    case TextureType::ARRAY_1D:
+        return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
+    case TextureType::_2D:
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(
+            v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+    case TextureType::_3D:
+    case TextureType::ARRAY_3D:
+        return v.ir.CompositeConstruct(
+            v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
+    case TextureType::CUBE:
+    case TextureType::ARRAY_CUBE:
+        throw NotImplementedException("Illegal offset on CUBE sample");
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
+    const IR::U32 value1{v.X(reg++)};
+    const IR::U32 value2{v.X(reg++)};
+    const IR::U32 bitsize{v.ir.Imm32(6)};
+    const auto make_vector{[&v, &bitsize](const IR::U32& value) {
+        return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
+    }};
+    return {make_vector(value1), make_vector(value2)};
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
+          bool is_bindless) {
+    union {
+        u64 raw;
+        BitField<35, 1, u64> ndv;
+        BitField<49, 1, u64> nodep;
+        BitField<50, 1, u64> dc;
+        BitField<51, 3, IR::Pred> sparse_pred;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> meta_reg;
+        BitField<28, 3, TextureType> type;
+        BitField<31, 4, u64> mask;
+        BitField<36, 13, u64> cbuf_offset;
+    } const tld4{insn};
+
+    const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
+
+    IR::Reg meta_reg{tld4.meta_reg};
+    IR::Value handle;
+    IR::Value offset;
+    IR::Value offset2;
+    IR::F32 dref;
+    if (!is_bindless) {
+        handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
+    } else {
+        handle = v.X(meta_reg++);
+    }
+    switch (offset_type) {
+    case OffsetType::None:
+        break;
+    case OffsetType::AOFFI:
+        offset = MakeOffset(v, meta_reg, tld4.type);
+        break;
+    case OffsetType::PTP:
+        std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
+        break;
+    default:
+        throw NotImplementedException("Invalid offset type {}", offset_type);
+    }
+    if (tld4.dc != 0) {
+        dref = v.F(meta_reg++);
+    }
+    IR::TextureInstInfo info{};
+    info.type.Assign(GetType(tld4.type));
+    info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
+    info.gather_component.Assign(static_cast<u32>(component_type));
+    const IR::Value sample{[&] {
+        if (tld4.dc == 0) {
+            return v.ir.ImageGather(handle, coords, offset, offset2, info);
+        }
+        return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
+    }()};
+
+    IR::Reg dest_reg{tld4.dest_reg};
+    for (size_t element = 0; element < 4; ++element) {
+        if (((tld4.mask >> element) & 1) == 0) {
+            continue;
+        }
+        v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+        ++dest_reg;
+    }
+    if (tld4.sparse_pred != IR::Pred::PT) {
+        v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4(u64 insn) {
+    union {
+        u64 raw;
+        BitField<56, 2, ComponentType> component;
+        BitField<54, 2, OffsetType> offset;
+    } const tld4{insn};
+    Impl(*this, insn, tld4.component, tld4.offset, false);
+}
+
+void TranslatorVisitor::TLD4_b(u64 insn) {
+    union {
+        u64 raw;
+        BitField<38, 2, ComponentType> component;
+        BitField<36, 2, OffsetType> offset;
+    } const tld4{insn};
+    Impl(*this, insn, tld4.component, tld4.offset, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+    F32,
+    F16,
+};
+
+enum class ComponentType : u64 {
+    R = 0,
+    G = 1,
+    B = 2,
+    A = 3,
+};
+
+union Encoding {
+    u64 raw;
+    BitField<55, 1, Precision> precision;
+    BitField<52, 2, ComponentType> component_type;
+    BitField<51, 1, u64> aoffi;
+    BitField<50, 1, u64> dc;
+    BitField<49, 1, u64> nodep;
+    BitField<28, 8, IR::Reg> dest_reg_b;
+    BitField<0, 8, IR::Reg> dest_reg_a;
+    BitField<8, 8, IR::Reg> src_reg_a;
+    BitField<20, 8, IR::Reg> src_reg_b;
+    BitField<36, 13, u64> cbuf_offset;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+    if (!IR::IsAligned(reg, alignment)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+    const IR::U32 value{v.X(reg)};
+    return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+                                   v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+    const Encoding tld4s{insn};
+    const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
+    const IR::Reg reg_a{tld4s.src_reg_a};
+    const IR::Reg reg_b{tld4s.src_reg_b};
+    IR::TextureInstInfo info{};
+    if (tld4s.precision == Precision::F16) {
+        info.relaxed_precision.Assign(1);
+    }
+    info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
+    info.type.Assign(Shader::TextureType::Color2D);
+    info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
+    IR::Value coords;
+    if (tld4s.aoffi != 0) {
+        CheckAlignment(reg_a, 2);
+        coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+        IR::Value offset = MakeOffset(v, reg_b);
+        if (tld4s.dc != 0) {
+            CheckAlignment(reg_b, 2);
+            IR::F32 dref = v.F(reg_b + 1);
+            return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
+        }
+        return v.ir.ImageGather(handle, coords, offset, {}, info);
+    }
+    if (tld4s.dc != 0) {
+        CheckAlignment(reg_a, 2);
+        coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+        IR::F32 dref = v.F(reg_b);
+        return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
+    }
+    coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
+    return v.ir.ImageGather(handle, coords, {}, {}, info);
+}
+
+IR::Reg RegStoreComponent32(u64 insn, size_t index) {
+    const Encoding tlds4{insn};
+    switch (index) {
+    case 0:
+        return tlds4.dest_reg_a;
+    case 1:
+        CheckAlignment(tlds4.dest_reg_a, 2);
+        return tlds4.dest_reg_a + 1;
+    case 2:
+        return tlds4.dest_reg_b;
+    case 3:
+        CheckAlignment(tlds4.dest_reg_b, 2);
+        return tlds4.dest_reg_b + 1;
+    }
+    throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+    for (size_t component = 0; component < 4; ++component) {
+        const IR::Reg dest{RegStoreComponent32(insn, component)};
+        v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
+    }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+    return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+    std::array<IR::F32, 4> swizzled;
+    for (size_t component = 0; component < 4; ++component) {
+        swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
+    }
+    const Encoding tld4s{insn};
+    v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+    v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4S(u64 insn) {
+    const IR::Value sample{Sample(*this, insn)};
+    if (Encoding{insn}.precision == Precision::F32) {
+        Store32(*this, insn, sample);
+    } else {
+        Store16(*this, insn, sample);
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+    _1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+    ARRAY_3D,
+    CUBE,
+    ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+    switch (type) {
+    case TextureType::_1D:
+        return Shader::TextureType::Color1D;
+    case TextureType::ARRAY_1D:
+        return Shader::TextureType::ColorArray1D;
+    case TextureType::_2D:
+        return Shader::TextureType::Color2D;
+    case TextureType::ARRAY_2D:
+        return Shader::TextureType::ColorArray2D;
+    case TextureType::_3D:
+        return Shader::TextureType::Color3D;
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return Shader::TextureType::ColorCube;
+    case TextureType::ARRAY_CUBE:
+        return Shader::TextureType::ColorArrayCube;
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
+    const IR::U32 value{v.X(reg)};
+    const u32 base{has_lod_clamp ? 12U : 16U};
+    return v.ir.CompositeConstruct(
+        v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
+        v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+    union {
+        u64 raw;
+        BitField<49, 1, u64> nodep;
+        BitField<35, 1, u64> aoffi;
+        BitField<50, 1, u64> lc;
+        BitField<51, 3, IR::Pred> sparse_pred;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> derivate_reg;
+        BitField<28, 3, TextureType> type;
+        BitField<31, 4, u64> mask;
+        BitField<36, 13, u64> cbuf_offset;
+    } const txd{insn};
+
+    const bool has_lod_clamp = txd.lc != 0;
+    if (has_lod_clamp) {
+        throw NotImplementedException("TXD.LC - CLAMP is not implemented");
+    }
+
+    IR::Value coords;
+    u32 num_derivates{};
+    IR::Reg base_reg{txd.coord_reg};
+    IR::Reg last_reg;
+    IR::Value handle;
+    if (is_bindless) {
+        handle = v.X(base_reg++);
+    } else {
+        handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
+    }
+
+    const auto read_array{[&]() -> IR::F32 {
+        const IR::U32 base{v.ir.Imm32(0)};
+        const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
+        const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
+        return v.ir.ConvertUToF(32, 16, array_index);
+    }};
+    switch (txd.type) {
+    case TextureType::_1D: {
+        coords = v.F(base_reg);
+        num_derivates = 1;
+        last_reg = base_reg + 1;
+        break;
+    }
+    case TextureType::ARRAY_1D: {
+        last_reg = base_reg + 1;
+        coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
+        num_derivates = 1;
+        break;
+    }
+    case TextureType::_2D: {
+        last_reg = base_reg + 2;
+        coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
+        num_derivates = 2;
+        break;
+    }
+    case TextureType::ARRAY_2D: {
+        last_reg = base_reg + 2;
+        coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
+        num_derivates = 2;
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid texture type");
+    }
+
+    const IR::Reg derivate_reg{txd.derivate_reg};
+    IR::Value derivates;
+    switch (num_derivates) {
+    case 1: {
+        derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
+        break;
+    }
+    case 2: {
+        derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
+                                            v.F(derivate_reg + 2), v.F(derivate_reg + 3));
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid texture type");
+    }
+
+    IR::Value offset;
+    if (txd.aoffi != 0) {
+        offset = MakeOffset(v, last_reg, has_lod_clamp);
+    }
+
+    IR::F32 lod_clamp;
+    if (has_lod_clamp) {
+        // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
+        // to convert a fixed point, float(value) / float(1 << fixed_point)
+        // in this case the fixed_point is 8.
+        const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
+        const IR::F32 fixp_lc{v.ir.ConvertUToF(
+            32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
+        lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
+    }
+
+    IR::TextureInstInfo info{};
+    info.type.Assign(GetType(txd.type));
+    info.num_derivates.Assign(num_derivates);
+    info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
+    const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
+
+    IR::Reg dest_reg{txd.dest_reg};
+    for (size_t element = 0; element < 4; ++element) {
+        if (((txd.mask >> element) & 1) == 0) {
+            continue;
+        }
+        v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+        ++dest_reg;
+    }
+    if (txd.sparse_pred != IR::Pred::PT) {
+        v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXD(u64 insn) {
+    Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TXD_b(u64 insn) {
+    Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+    _1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+    ARRAY_3D,
+    CUBE,
+    ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+    switch (type) {
+    case TextureType::_1D:
+        return Shader::TextureType::Color1D;
+    case TextureType::ARRAY_1D:
+        return Shader::TextureType::ColorArray1D;
+    case TextureType::_2D:
+        return Shader::TextureType::Color2D;
+    case TextureType::ARRAY_2D:
+        return Shader::TextureType::ColorArray2D;
+    case TextureType::_3D:
+        return Shader::TextureType::Color3D;
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return Shader::TextureType::ColorCube;
+    case TextureType::ARRAY_CUBE:
+        return Shader::TextureType::ColorArrayCube;
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+    const auto read_array{
+        [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
+    switch (type) {
+    case TextureType::_1D:
+        return v.X(reg);
+    case TextureType::ARRAY_1D:
+        return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
+    case TextureType::_2D:
+        return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
+    case TextureType::_3D:
+        return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+    case TextureType::ARRAY_CUBE:
+        return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+    const IR::U32 value{v.X(reg++)};
+    switch (type) {
+    case TextureType::_1D:
+    case TextureType::ARRAY_1D:
+        return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+    case TextureType::_2D:
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(
+            v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+    case TextureType::_3D:
+    case TextureType::ARRAY_3D:
+        return v.ir.CompositeConstruct(
+            v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+            v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+    case TextureType::CUBE:
+    case TextureType::ARRAY_CUBE:
+        throw NotImplementedException("Illegal offset on CUBE sample");
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+    union {
+        u64 raw;
+        BitField<49, 1, u64> nodep;
+        BitField<55, 1, u64> lod;
+        BitField<50, 1, u64> multisample;
+        BitField<35, 1, u64> aoffi;
+        BitField<54, 1, u64> clamp;
+        BitField<51, 3, IR::Pred> sparse_pred;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> meta_reg;
+        BitField<28, 3, TextureType> type;
+        BitField<31, 4, u64> mask;
+        BitField<36, 13, u64> cbuf_offset;
+    } const tld{insn};
+
+    const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
+
+    IR::Reg meta_reg{tld.meta_reg};
+    IR::Value handle;
+    IR::Value offset;
+    IR::U32 lod;
+    IR::U32 multisample;
+    if (is_bindless) {
+        handle = v.X(meta_reg++);
+    } else {
+        handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
+    }
+    if (tld.lod != 0) {
+        lod = v.X(meta_reg++);
+    } else {
+        lod = v.ir.Imm32(0U);
+    }
+    if (tld.aoffi != 0) {
+        offset = MakeOffset(v, meta_reg, tld.type);
+    }
+    if (tld.multisample != 0) {
+        multisample = v.X(meta_reg++);
+    }
+    if (tld.clamp != 0) {
+        throw NotImplementedException("TLD.CL - CLAMP is not implmented");
+    }
+    IR::TextureInstInfo info{};
+    info.type.Assign(GetType(tld.type));
+    const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
+
+    IR::Reg dest_reg{tld.dest_reg};
+    for (size_t element = 0; element < 4; ++element) {
+        if (((tld.mask >> element) & 1) == 0) {
+            continue;
+        }
+        v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+        ++dest_reg;
+    }
+    if (tld.sparse_pred != IR::Pred::PT) {
+        v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD(u64 insn) {
+    Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TLD_b(u64 insn) {
+    Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+    F16,
+    F32,
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+    R,     //
+    G,     //
+    B,     //
+    A,     //
+    R | G, //
+    R | A, //
+    G | A, //
+    B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+    R | G | B,     //
+    R | G | A,     //
+    R | B | A,     //
+    G | B | A,     //
+    R | G | B | A, //
+};
+
+union Encoding {
+    u64 raw;
+    BitField<59, 1, Precision> precision;
+    BitField<54, 1, u64> aoffi;
+    BitField<53, 1, u64> lod;
+    BitField<55, 1, u64> ms;
+    BitField<49, 1, u64> nodep;
+    BitField<28, 8, IR::Reg> dest_reg_b;
+    BitField<0, 8, IR::Reg> dest_reg_a;
+    BitField<8, 8, IR::Reg> src_reg_a;
+    BitField<20, 8, IR::Reg> src_reg_b;
+    BitField<36, 13, u64> cbuf_offset;
+    BitField<50, 3, u64> swizzle;
+    BitField<53, 4, u64> encoding;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+    if (!IR::IsAligned(reg, alignment)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+    const IR::U32 value{v.X(reg)};
+    return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+                                   v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+    const Encoding tlds{insn};
+    const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
+    const IR::Reg reg_a{tlds.src_reg_a};
+    const IR::Reg reg_b{tlds.src_reg_b};
+    IR::Value coords;
+    IR::U32 lod{v.ir.Imm32(0U)};
+    IR::Value offsets;
+    IR::U32 multisample;
+    Shader::TextureType texture_type{};
+    switch (tlds.encoding) {
+    case 0:
+        texture_type = Shader::TextureType::Color1D;
+        coords = v.X(reg_a);
+        break;
+    case 1:
+        texture_type = Shader::TextureType::Color1D;
+        coords = v.X(reg_a);
+        lod = v.X(reg_b);
+        break;
+    case 2:
+        texture_type = Shader::TextureType::Color2D;
+        coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
+        break;
+    case 4:
+        CheckAlignment(reg_a, 2);
+        texture_type = Shader::TextureType::Color2D;
+        coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+        offsets = MakeOffset(v, reg_b);
+        break;
+    case 5:
+        CheckAlignment(reg_a, 2);
+        texture_type = Shader::TextureType::Color2D;
+        coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+        lod = v.X(reg_b);
+        break;
+    case 6:
+        CheckAlignment(reg_a, 2);
+        texture_type = Shader::TextureType::Color2D;
+        coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+        multisample = v.X(reg_b);
+        break;
+    case 7:
+        CheckAlignment(reg_a, 2);
+        texture_type = Shader::TextureType::Color3D;
+        coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
+        break;
+    case 8: {
+        CheckAlignment(reg_b, 2);
+        const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
+        texture_type = Shader::TextureType::ColorArray2D;
+        coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
+        break;
+    }
+    case 12:
+        CheckAlignment(reg_a, 2);
+        CheckAlignment(reg_b, 2);
+        texture_type = Shader::TextureType::Color2D;
+        coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+        lod = v.X(reg_b);
+        offsets = MakeOffset(v, reg_b + 1);
+        break;
+    default:
+        throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
+    }
+    IR::TextureInstInfo info{};
+    if (tlds.precision == Precision::F16) {
+        info.relaxed_precision.Assign(1);
+    }
+    info.type.Assign(texture_type);
+    return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
+}
+
+unsigned Swizzle(u64 insn) {
+    const Encoding tlds{insn};
+    const size_t encoding{tlds.swizzle};
+    if (tlds.dest_reg_b == IR::Reg::RZ) {
+        if (encoding >= RG_LUT.size()) {
+            throw NotImplementedException("Illegal RG encoding {}", encoding);
+        }
+        return RG_LUT[encoding];
+    } else {
+        if (encoding >= RGBA_LUT.size()) {
+            throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+        }
+        return RGBA_LUT[encoding];
+    }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+    return IR::F32{v.ir.CompositeExtract(sample, component)};
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+    const Encoding tlds{insn};
+    switch (index) {
+    case 0:
+        return tlds.dest_reg_a;
+    case 1:
+        CheckAlignment(tlds.dest_reg_a, 2);
+        return tlds.dest_reg_a + 1;
+    case 2:
+        return tlds.dest_reg_b;
+    case 3:
+        CheckAlignment(tlds.dest_reg_b, 2);
+        return tlds.dest_reg_b + 1;
+    }
+    throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+    const unsigned swizzle{Swizzle(insn)};
+    unsigned store_index{0};
+    for (unsigned component = 0; component < 4; ++component) {
+        if (((swizzle >> component) & 1) == 0) {
+            continue;
+        }
+        const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+        v.F(dest, Extract(v, sample, component));
+        ++store_index;
+    }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+    return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+    const unsigned swizzle{Swizzle(insn)};
+    unsigned store_index{0};
+    std::array<IR::F32, 4> swizzled;
+    for (unsigned component = 0; component < 4; ++component) {
+        if (((swizzle >> component) & 1) == 0) {
+            continue;
+        }
+        swizzled[store_index] = Extract(v, sample, component);
+        ++store_index;
+    }
+    const IR::F32 zero{v.ir.Imm32(0.0f)};
+    const Encoding tlds{insn};
+    switch (store_index) {
+    case 1:
+        v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
+        break;
+    case 2:
+    case 3:
+    case 4:
+        v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+        switch (store_index) {
+        case 2:
+            break;
+        case 3:
+            v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
+            break;
+        case 4:
+            v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+            break;
+        }
+        break;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLDS(u64 insn) {
+    const IR::Value sample{Sample(*this, insn)};
+    if (Encoding{insn}.precision == Precision::F32) {
+        Store32(*this, insn, sample);
+    } else {
+        Store16(*this, insn, sample);
+    }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+    _1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+    ARRAY_3D,
+    CUBE,
+    ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+    switch (type) {
+    case TextureType::_1D:
+        return Shader::TextureType::Color1D;
+    case TextureType::ARRAY_1D:
+        return Shader::TextureType::ColorArray1D;
+    case TextureType::_2D:
+        return Shader::TextureType::Color2D;
+    case TextureType::ARRAY_2D:
+        return Shader::TextureType::ColorArray2D;
+    case TextureType::_3D:
+        return Shader::TextureType::Color3D;
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return Shader::TextureType::ColorCube;
+    case TextureType::ARRAY_CUBE:
+        return Shader::TextureType::ColorArrayCube;
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+    // The ISA reads an array component here, but this is not needed on high level shading languages
+    // We are dropping this information.
+    switch (type) {
+    case TextureType::_1D:
+        return v.F(reg);
+    case TextureType::ARRAY_1D:
+        return v.F(reg + 1);
+    case TextureType::_2D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
+    case TextureType::_3D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_CUBE:
+        return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+    union {
+        u64 raw;
+        BitField<49, 1, u64> nodep;
+        BitField<35, 1, u64> ndv;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> meta_reg;
+        BitField<28, 3, TextureType> type;
+        BitField<31, 4, u64> mask;
+        BitField<36, 13, u64> cbuf_offset;
+    } const tmml{insn};
+
+    if ((tmml.mask & 0b1100) != 0) {
+        throw NotImplementedException("TMML BA results are not implmented");
+    }
+    const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
+
+    IR::U32 handle;
+    IR::Reg meta_reg{tmml.meta_reg};
+    if (is_bindless) {
+        handle = v.X(meta_reg++);
+    } else {
+        handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
+    }
+    IR::TextureInstInfo info{};
+    info.type.Assign(GetType(tmml.type));
+    const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
+
+    IR::Reg dest_reg{tmml.dest_reg};
+    for (size_t element = 0; element < 4; ++element) {
+        if (((tmml.mask >> element) & 1) == 0) {
+            continue;
+        }
+        IR::F32 value{v.ir.CompositeExtract(sample, element)};
+        if (element < 2) {
+            IR::U32 casted_value;
+            if (element == 0) {
+                casted_value = v.ir.ConvertFToU(32, value);
+            } else {
+                casted_value = v.ir.ConvertFToS(16, value);
+            }
+            v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
+        } else {
+            v.F(dest_reg, value);
+        }
+        ++dest_reg;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TMML(u64 insn) {
+    Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TMML_b(u64 insn) {
+    Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    Dimension = 1,
+    TextureType = 2,
+    SamplePos = 5,
+};
+
+IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
+    switch (mode) {
+    case Mode::Dimension: {
+        const IR::U32 lod{v.X(src_reg)};
+        return v.ir.ImageQueryDimension(handle, lod);
+    }
+    case Mode::TextureType:
+    case Mode::SamplePos:
+    default:
+        throw NotImplementedException("Mode {}", mode);
+    }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
+    union {
+        u64 raw;
+        BitField<49, 1, u64> nodep;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<22, 3, Mode> mode;
+        BitField<31, 4, u64> mask;
+    } const txq{insn};
+
+    IR::Reg src_reg{txq.src_reg};
+    IR::U32 handle;
+    if (cbuf_offset) {
+        handle = v.ir.Imm32(*cbuf_offset);
+    } else {
+        handle = v.X(src_reg);
+        ++src_reg;
+    }
+    const IR::Value query{Query(v, handle, txq.mode, src_reg)};
+    IR::Reg dest_reg{txq.dest_reg};
+    for (int element = 0; element < 4; ++element) {
+        if (((txq.mask >> element) & 1) == 0) {
+            continue;
+        }
+        v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
+        ++dest_reg;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXQ(u64 insn) {
+    union {
+        u64 raw;
+        BitField<36, 13, u64> cbuf_offset;
+    } const txq{insn};
+
+    Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TXQ_b(u64 insn) {
+    Impl(*this, insn, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
+                                 u32 selector, bool is_signed) {
+    switch (width) {
+    case VideoWidth::Byte:
+    case VideoWidth::Unknown:
+        return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
+    case VideoWidth::Short:
+        return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
+    case VideoWidth::Word:
+        return value;
+    default:
+        throw NotImplementedException("Unknown VideoWidth {}", width);
+    }
+}
+
+VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
+    // immediates must be 16-bit format.
+    return is_immediate ? VideoWidth::Short : width;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+enum class VideoWidth : u64 {
+    Byte,
+    Unknown,
+    Short,
+    Word,
+};
+
+[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
+                                               VideoWidth width, u32 selector, bool is_signed);
+
+[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VideoMinMaxOps : u64 {
+    MRG_16H,
+    MRG_16L,
+    MRG_8B0,
+    MRG_8B2,
+    ACC,
+    MIN,
+    MAX,
+};
+
+[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
+                                         VideoMinMaxOps op, bool is_signed) {
+    switch (op) {
+    case VideoMinMaxOps::MIN:
+        return ir.IMin(lhs, rhs, is_signed);
+    case VideoMinMaxOps::MAX:
+        return ir.IMax(lhs, rhs, is_signed);
+    default:
+        throw NotImplementedException("VMNMX op {}", op);
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VMNMX(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<20, 16, u64> src_b_imm;
+        BitField<28, 2, u64> src_b_selector;
+        BitField<29, 2, VideoWidth> src_b_width;
+        BitField<36, 2, u64> src_a_selector;
+        BitField<37, 2, VideoWidth> src_a_width;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> src_a_sign;
+        BitField<49, 1, u64> src_b_sign;
+        BitField<50, 1, u64> is_src_b_reg;
+        BitField<51, 3, VideoMinMaxOps> op;
+        BitField<54, 1, u64> dest_sign;
+        BitField<55, 1, u64> sat;
+        BitField<56, 1, u64> mx;
+    } const vmnmx{insn};
+
+    if (vmnmx.cc != 0) {
+        throw NotImplementedException("VMNMX CC");
+    }
+    if (vmnmx.sat != 0) {
+        throw NotImplementedException("VMNMX SAT");
+    }
+    // Selectors were shown to default to 2 in unit tests
+    if (vmnmx.src_a_selector != 2) {
+        throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
+    }
+    if (vmnmx.src_b_selector != 2) {
+        throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
+    }
+    if (vmnmx.src_a_width != VideoWidth::Word) {
+        throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
+    }
+
+    const bool is_b_imm{vmnmx.is_src_b_reg == 0};
+    const IR::U32 src_a{GetReg8(insn)};
+    const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
+    const IR::U32 src_c{GetReg39(insn)};
+
+    const VideoWidth a_width{vmnmx.src_a_width};
+    const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
+
+    const bool src_a_signed{vmnmx.src_a_sign != 0};
+    const bool src_b_signed{vmnmx.src_b_sign != 0};
+    const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
+    const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
+
+    // First operation's sign is only dependent on operand b's sign
+    const bool op_1_signed{src_b_signed};
+
+    const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
+                                    : ir.IMin(op_a, op_b, op_1_signed)};
+    X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::VMAD(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<20, 16, u64> src_b_imm;
+        BitField<28, 2, u64> src_b_selector;
+        BitField<29, 2, VideoWidth> src_b_width;
+        BitField<36, 2, u64> src_a_selector;
+        BitField<37, 2, VideoWidth> src_a_width;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> src_a_sign;
+        BitField<49, 1, u64> src_b_sign;
+        BitField<50, 1, u64> is_src_b_reg;
+        BitField<51, 2, u64> scale;
+        BitField<53, 1, u64> src_c_neg;
+        BitField<54, 1, u64> src_a_neg;
+        BitField<55, 1, u64> sat;
+    } const vmad{insn};
+
+    if (vmad.cc != 0) {
+        throw NotImplementedException("VMAD CC");
+    }
+    if (vmad.sat != 0) {
+        throw NotImplementedException("VMAD SAT");
+    }
+    if (vmad.scale != 0) {
+        throw NotImplementedException("VMAD SCALE");
+    }
+    if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
+        throw NotImplementedException("VMAD PO");
+    }
+    if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
+        throw NotImplementedException("VMAD NEG");
+    }
+    const bool is_b_imm{vmad.is_src_b_reg == 0};
+    const IR::U32 src_a{GetReg8(insn)};
+    const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
+    const IR::U32 src_c{GetReg39(insn)};
+
+    const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
+    // Immediate values can't have a selector
+    const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
+    const VideoWidth a_width{vmad.src_a_width};
+    const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
+
+    const bool src_a_signed{vmad.src_a_sign != 0};
+    const bool src_b_signed{vmad.src_b_sign != 0};
+    const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+    const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+    X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VsetpCompareOp : u64 {
+    False = 0,
+    LessThan,
+    Equal,
+    LessThanEqual,
+    GreaterThan = 16,
+    NotEqual,
+    GreaterThanEqual,
+    True,
+};
+
+CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
+    switch (op) {
+    case VsetpCompareOp::False:
+        return CompareOp::False;
+    case VsetpCompareOp::LessThan:
+        return CompareOp::LessThan;
+    case VsetpCompareOp::Equal:
+        return CompareOp::Equal;
+    case VsetpCompareOp::LessThanEqual:
+        return CompareOp::LessThanEqual;
+    case VsetpCompareOp::GreaterThan:
+        return CompareOp::GreaterThan;
+    case VsetpCompareOp::NotEqual:
+        return CompareOp::NotEqual;
+    case VsetpCompareOp::GreaterThanEqual:
+        return CompareOp::GreaterThanEqual;
+    case VsetpCompareOp::True:
+        return CompareOp::True;
+    default:
+        throw NotImplementedException("Invalid compare op {}", op);
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VSETP(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 3, IR::Pred> dest_pred_b;
+        BitField<3, 3, IR::Pred> dest_pred_a;
+        BitField<20, 16, u64> src_b_imm;
+        BitField<28, 2, u64> src_b_selector;
+        BitField<29, 2, VideoWidth> src_b_width;
+        BitField<36, 2, u64> src_a_selector;
+        BitField<37, 2, VideoWidth> src_a_width;
+        BitField<39, 3, IR::Pred> bop_pred;
+        BitField<42, 1, u64> neg_bop_pred;
+        BitField<43, 5, VsetpCompareOp> compare_op;
+        BitField<45, 2, BooleanOp> bop;
+        BitField<48, 1, u64> src_a_sign;
+        BitField<49, 1, u64> src_b_sign;
+        BitField<50, 1, u64> is_src_b_reg;
+    } const vsetp{insn};
+
+    const bool is_b_imm{vsetp.is_src_b_reg == 0};
+    const IR::U32 src_a{GetReg8(insn)};
+    const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
+
+    const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
+    const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
+    const VideoWidth a_width{vsetp.src_a_width};
+    const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
+
+    const bool src_a_signed{vsetp.src_a_sign != 0};
+    const bool src_b_signed{vsetp.src_b_sign != 0};
+    const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+    const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+    // Compare operation's sign is only dependent on operand b's sign
+    const bool compare_signed{src_b_signed};
+    const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
+    const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
+    const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
+    const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
+    const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
+    ir.SetPred(vsetp.dest_pred_a, result_a);
+    ir.SetPred(vsetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VoteOp : u64 {
+    ALL,
+    ANY,
+    EQ,
+};
+
+[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
+    switch (vote_op) {
+    case VoteOp::ALL:
+        return ir.VoteAll(pred);
+    case VoteOp::ANY:
+        return ir.VoteAny(pred);
+    case VoteOp::EQ:
+        return ir.VoteEqual(pred);
+    default:
+        throw NotImplementedException("Invalid VOTE op {}", vote_op);
+    }
+}
+
+void Vote(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<39, 3, IR::Pred> pred_a;
+        BitField<42, 1, u64> neg_pred_a;
+        BitField<45, 3, IR::Pred> pred_b;
+        BitField<48, 2, VoteOp> vote_op;
+    } const vote{insn};
+
+    const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
+    v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
+    v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VOTE(u64 insn) {
+    Vote(*this, insn);
+}
+
+void TranslatorVisitor::VOTE_vtg(u64) {
+    LOG_WARNING(Shader, "(STUBBED) called");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class ShuffleMode : u64 {
+    IDX,
+    UP,
+    DOWN,
+    BFLY,
+};
+
+[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
+                                       const IR::U32& index, const IR::U32& mask,
+                                       ShuffleMode shfl_op) {
+    const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
+    const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
+    switch (shfl_op) {
+    case ShuffleMode::IDX:
+        return ir.ShuffleIndex(value, index, clamp, seg_mask);
+    case ShuffleMode::UP:
+        return ir.ShuffleUp(value, index, clamp, seg_mask);
+    case ShuffleMode::DOWN:
+        return ir.ShuffleDown(value, index, clamp, seg_mask);
+    case ShuffleMode::BFLY:
+        return ir.ShuffleButterfly(value, index, clamp, seg_mask);
+    default:
+        throw NotImplementedException("Invalid SHFL op {}", shfl_op);
+    }
+}
+
+void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<30, 2, ShuffleMode> mode;
+        BitField<48, 3, IR::Pred> pred;
+    } const shfl{insn};
+
+    const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
+    v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
+    v.X(shfl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHFL(u64 insn) {
+    union {
+        u64 insn;
+        BitField<20, 5, u64> src_a_imm;
+        BitField<28, 1, u64> src_a_flag;
+        BitField<29, 1, u64> src_b_flag;
+        BitField<34, 13, u64> src_b_imm;
+    } const flags{insn};
+    const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
+                                              : GetReg20(insn)};
+    const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
+                                              : GetReg39(insn)};
+    Shuffle(*this, insn, src_a, src_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+
+namespace Shader::Maxwell {
+
+template <auto method>
+static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
+    using MethodType = decltype(method);
+    if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
+        (visitor.*method)(pc, insn);
+    } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
+        (visitor.*method)(insn);
+    } else {
+        (visitor.*method)();
+    }
+}
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
+    if (location_begin == location_end) {
+        return;
+    }
+    TranslatorVisitor visitor{env, *block};
+    for (Location pc = location_begin; pc != location_end; ++pc) {
+        const u64 insn{env.ReadInstruction(pc.Offset())};
+        try {
+            const Opcode opcode{Decode(insn)};
+            switch (opcode) {
+#define INST(name, cute, mask)                                                                     \
+    case Opcode::name:                                                                             \
+        Invoke<&TranslatorVisitor::name>(visitor, pc, insn);                                       \
+        break;
+#include "shader_recompiler/frontend/maxwell/maxwell.inc"
+#undef OPCODE
+            default:
+                throw LogicError("Invalid opcode {}", opcode);
+            }
+        } catch (Exception& exception) {
+            exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
+            throw;
+        }
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::Maxwell {
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "common/settings.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/post_order.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
+    size_t num_syntax_blocks{};
+    for (const auto& node : syntax_list) {
+        if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+            ++num_syntax_blocks;
+        }
+    }
+    IR::BlockList blocks;
+    blocks.reserve(num_syntax_blocks);
+    for (const auto& node : syntax_list) {
+        if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+            blocks.push_back(node.data.block);
+        }
+    }
+    return blocks;
+}
+
+void RemoveUnreachableBlocks(IR::Program& program) {
+    // Some blocks might be unreachable if a function call exists unconditionally
+    // If this happens the number of blocks and post order blocks will mismatch
+    if (program.blocks.size() == program.post_order_blocks.size()) {
+        return;
+    }
+    const auto begin{program.blocks.begin() + 1};
+    const auto end{program.blocks.end()};
+    const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
+    program.blocks.erase(std::remove_if(begin, end, pred), end);
+}
+
+void CollectInterpolationInfo(Environment& env, IR::Program& program) {
+    if (program.stage != Stage::Fragment) {
+        return;
+    }
+    const ProgramHeader& sph{env.SPH()};
+    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+        std::optional<PixelImap> imap;
+        for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
+            if (value == PixelImap::Unused) {
+                continue;
+            }
+            if (imap && imap != value) {
+                throw NotImplementedException("Per component interpolation");
+            }
+            imap = value;
+        }
+        if (!imap) {
+            continue;
+        }
+        program.info.interpolation[index] = [&] {
+            switch (*imap) {
+            case PixelImap::Unused:
+            case PixelImap::Perspective:
+                return Interpolation::Smooth;
+            case PixelImap::Constant:
+                return Interpolation::Flat;
+            case PixelImap::ScreenLinear:
+                return Interpolation::NoPerspective;
+            }
+            throw NotImplementedException("Unknown interpolation {}", *imap);
+        }();
+    }
+}
+
+void AddNVNStorageBuffers(IR::Program& program) {
+    if (!program.info.uses_global_memory) {
+        return;
+    }
+    const u32 driver_cbuf{0};
+    const u32 descriptor_size{0x10};
+    const u32 num_buffers{16};
+    const u32 base{[&] {
+        switch (program.stage) {
+        case Stage::VertexA:
+        case Stage::VertexB:
+            return 0x110u;
+        case Stage::TessellationControl:
+            return 0x210u;
+        case Stage::TessellationEval:
+            return 0x310u;
+        case Stage::Geometry:
+            return 0x410u;
+        case Stage::Fragment:
+            return 0x510u;
+        case Stage::Compute:
+            return 0x310u;
+        }
+        throw InvalidArgument("Invalid stage {}", program.stage);
+    }()};
+    auto& descs{program.info.storage_buffers_descriptors};
+    for (u32 index = 0; index < num_buffers; ++index) {
+        if (!program.info.nvn_buffer_used[index]) {
+            continue;
+        }
+        const u32 offset{base + index * descriptor_size};
+        const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
+        if (it != descs.end()) {
+            it->is_written |= program.info.stores_global_memory;
+            continue;
+        }
+        descs.push_back({
+            .cbuf_index = driver_cbuf,
+            .cbuf_offset = offset,
+            .count = 1,
+            .is_written = program.info.stores_global_memory,
+        });
+    }
+}
+} // Anonymous namespace
+
+IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+                             Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
+    IR::Program program;
+    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+    program.blocks = GenerateBlocks(program.syntax_list);
+    program.post_order_blocks = PostOrder(program.syntax_list.front());
+    program.stage = env.ShaderStage();
+    program.local_memory_size = env.LocalMemorySize();
+    switch (program.stage) {
+    case Stage::TessellationControl: {
+        const ProgramHeader& sph{env.SPH()};
+        program.invocations = sph.common2.threads_per_input_primitive;
+        break;
+    }
+    case Stage::Geometry: {
+        const ProgramHeader& sph{env.SPH()};
+        program.output_topology = sph.common3.output_topology;
+        program.output_vertices = sph.common4.max_output_vertices;
+        program.invocations = sph.common2.threads_per_input_primitive;
+        program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
+        if (program.is_geometry_passthrough) {
+            const auto& mask{env.GpPassthroughMask()};
+            for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
+                program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
+            }
+        }
+        break;
+    }
+    case Stage::Compute:
+        program.workgroup_size = env.WorkgroupSize();
+        program.shared_memory_size = env.SharedMemorySize();
+        break;
+    default:
+        break;
+    }
+    RemoveUnreachableBlocks(program);
+
+    // Replace instructions before the SSA rewrite
+    if (!host_info.support_float16) {
+        Optimization::LowerFp16ToFp32(program);
+    }
+    if (!host_info.support_int64) {
+        Optimization::LowerInt64ToInt32(program);
+    }
+    Optimization::SsaRewritePass(program);
+
+    Optimization::GlobalMemoryToStorageBufferPass(program);
+    Optimization::TexturePass(env, program);
+
+    Optimization::ConstantPropagationPass(program);
+    Optimization::DeadCodeEliminationPass(program);
+    if (Settings::values.renderer_debug) {
+        Optimization::VerificationPass(program);
+    }
+    Optimization::CollectShaderInfoPass(env, program);
+    CollectInterpolationInfo(env, program);
+    AddNVNStorageBuffers(program);
+    return program;
+}
+
+IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+                                    Environment& env_vertex_b) {
+    IR::Program result{};
+    Optimization::VertexATransformPass(vertex_a);
+    Optimization::VertexBTransformPass(vertex_b);
+    for (const auto& term : vertex_a.syntax_list) {
+        if (term.type != IR::AbstractSyntaxNode::Type::Return) {
+            result.syntax_list.push_back(term);
+        }
+    }
+    result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
+                              vertex_b.syntax_list.end());
+    result.blocks = GenerateBlocks(result.syntax_list);
+    result.post_order_blocks = vertex_b.post_order_blocks;
+    for (const auto& block : vertex_a.post_order_blocks) {
+        result.post_order_blocks.push_back(block);
+    }
+    result.stage = Stage::VertexB;
+    result.info = vertex_a.info;
+    result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
+    result.info.loads.mask |= vertex_b.info.loads.mask;
+    result.info.stores.mask |= vertex_b.info.stores.mask;
+
+    Optimization::JoinTextureInfo(result.info, vertex_b.info);
+    Optimization::JoinStorageInfo(result.info, vertex_b.info);
+    Optimization::DeadCodeEliminationPass(result);
+    if (Settings::values.renderer_debug) {
+        Optimization::VerificationPass(result);
+    }
+    Optimization::CollectShaderInfoPass(env_vertex_b, result);
+    return result;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
+                                           ObjectPool<IR::Block>& block_pool, Environment& env,
+                                           Flow::CFG& cfg, const HostTranslateInfo& host_info);
+
+[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+                                                  Environment& env_vertex_b);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
new file mode 100644
index 000000000..94a584219
--- /dev/null
+++ b/src/shader_recompiler/host_translate_info.h
@@ -0,0 +1,18 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Shader {
+
+// Try to keep entries here to a minimum
+// They can accidentally change the cached information in a shader
+
+/// Misc information about the host
+struct HostTranslateInfo {
+    bool support_float16{}; ///< True when the device supports 16-bit floats
+    bool support_int64{};   ///< True when the device supports 64-bit integers
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
new file mode 100644
index 000000000..5ead930f1
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -0,0 +1,928 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/alignment.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
+    if (count != 1) {
+        throw NotImplementedException("Constant buffer descriptor indexing");
+    }
+    if ((info.constant_buffer_mask & (1U << index)) != 0) {
+        return;
+    }
+    info.constant_buffer_mask |= 1U << index;
+
+    auto& cbufs{info.constant_buffer_descriptors};
+    cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
+                 ConstantBufferDescriptor{
+                     .index = index,
+                     .count = 1,
+                 });
+}
+
+void GetPatch(Info& info, IR::Patch patch) {
+    if (!IR::IsGeneric(patch)) {
+        throw NotImplementedException("Reading non-generic patch {}", patch);
+    }
+    info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
+}
+
+void SetPatch(Info& info, IR::Patch patch) {
+    if (IR::IsGeneric(patch)) {
+        info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
+        return;
+    }
+    switch (patch) {
+    case IR::Patch::TessellationLodLeft:
+    case IR::Patch::TessellationLodTop:
+    case IR::Patch::TessellationLodRight:
+    case IR::Patch::TessellationLodBottom:
+        info.stores_tess_level_outer = true;
+        break;
+    case IR::Patch::TessellationLodInteriorU:
+    case IR::Patch::TessellationLodInteriorV:
+        info.stores_tess_level_inner = true;
+        break;
+    default:
+        throw NotImplementedException("Set patch {}", patch);
+    }
+}
+
+void CheckCBufNVN(Info& info, IR::Inst& inst) {
+    const IR::Value cbuf_index{inst.Arg(0)};
+    if (!cbuf_index.IsImmediate()) {
+        info.nvn_buffer_used.set();
+        return;
+    }
+    const u32 index{cbuf_index.U32()};
+    if (index != 0) {
+        return;
+    }
+    const IR::Value cbuf_offset{inst.Arg(1)};
+    if (!cbuf_offset.IsImmediate()) {
+        info.nvn_buffer_used.set();
+        return;
+    }
+    const u32 offset{cbuf_offset.U32()};
+    const u32 descriptor_size{0x10};
+    const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
+    if (offset >= info.nvn_buffer_base && offset < upper_limit) {
+        const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
+        info.nvn_buffer_used.set(nvn_index, true);
+    }
+}
+
+void VisitUsages(Info& info, IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::CompositeConstructF16x2:
+    case IR::Opcode::CompositeConstructF16x3:
+    case IR::Opcode::CompositeConstructF16x4:
+    case IR::Opcode::CompositeExtractF16x2:
+    case IR::Opcode::CompositeExtractF16x3:
+    case IR::Opcode::CompositeExtractF16x4:
+    case IR::Opcode::CompositeInsertF16x2:
+    case IR::Opcode::CompositeInsertF16x3:
+    case IR::Opcode::CompositeInsertF16x4:
+    case IR::Opcode::SelectF16:
+    case IR::Opcode::BitCastU16F16:
+    case IR::Opcode::BitCastF16U16:
+    case IR::Opcode::PackFloat2x16:
+    case IR::Opcode::UnpackFloat2x16:
+    case IR::Opcode::ConvertS16F16:
+    case IR::Opcode::ConvertS32F16:
+    case IR::Opcode::ConvertS64F16:
+    case IR::Opcode::ConvertU16F16:
+    case IR::Opcode::ConvertU32F16:
+    case IR::Opcode::ConvertU64F16:
+    case IR::Opcode::ConvertF16S8:
+    case IR::Opcode::ConvertF16S16:
+    case IR::Opcode::ConvertF16S32:
+    case IR::Opcode::ConvertF16S64:
+    case IR::Opcode::ConvertF16U8:
+    case IR::Opcode::ConvertF16U16:
+    case IR::Opcode::ConvertF16U32:
+    case IR::Opcode::ConvertF16U64:
+    case IR::Opcode::FPAbs16:
+    case IR::Opcode::FPAdd16:
+    case IR::Opcode::FPCeil16:
+    case IR::Opcode::FPFloor16:
+    case IR::Opcode::FPFma16:
+    case IR::Opcode::FPMul16:
+    case IR::Opcode::FPNeg16:
+    case IR::Opcode::FPRoundEven16:
+    case IR::Opcode::FPSaturate16:
+    case IR::Opcode::FPClamp16:
+    case IR::Opcode::FPTrunc16:
+    case IR::Opcode::FPOrdEqual16:
+    case IR::Opcode::FPUnordEqual16:
+    case IR::Opcode::FPOrdNotEqual16:
+    case IR::Opcode::FPUnordNotEqual16:
+    case IR::Opcode::FPOrdLessThan16:
+    case IR::Opcode::FPUnordLessThan16:
+    case IR::Opcode::FPOrdGreaterThan16:
+    case IR::Opcode::FPUnordGreaterThan16:
+    case IR::Opcode::FPOrdLessThanEqual16:
+    case IR::Opcode::FPUnordLessThanEqual16:
+    case IR::Opcode::FPOrdGreaterThanEqual16:
+    case IR::Opcode::FPUnordGreaterThanEqual16:
+    case IR::Opcode::FPIsNan16:
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::StorageAtomicAddF16x2:
+    case IR::Opcode::StorageAtomicMinF16x2:
+    case IR::Opcode::StorageAtomicMaxF16x2:
+        info.uses_fp16 = true;
+        break;
+    case IR::Opcode::CompositeConstructF64x2:
+    case IR::Opcode::CompositeConstructF64x3:
+    case IR::Opcode::CompositeConstructF64x4:
+    case IR::Opcode::CompositeExtractF64x2:
+    case IR::Opcode::CompositeExtractF64x3:
+    case IR::Opcode::CompositeExtractF64x4:
+    case IR::Opcode::CompositeInsertF64x2:
+    case IR::Opcode::CompositeInsertF64x3:
+    case IR::Opcode::CompositeInsertF64x4:
+    case IR::Opcode::SelectF64:
+    case IR::Opcode::BitCastU64F64:
+    case IR::Opcode::BitCastF64U64:
+    case IR::Opcode::PackDouble2x32:
+    case IR::Opcode::UnpackDouble2x32:
+    case IR::Opcode::FPAbs64:
+    case IR::Opcode::FPAdd64:
+    case IR::Opcode::FPCeil64:
+    case IR::Opcode::FPFloor64:
+    case IR::Opcode::FPFma64:
+    case IR::Opcode::FPMax64:
+    case IR::Opcode::FPMin64:
+    case IR::Opcode::FPMul64:
+    case IR::Opcode::FPNeg64:
+    case IR::Opcode::FPRecip64:
+    case IR::Opcode::FPRecipSqrt64:
+    case IR::Opcode::FPRoundEven64:
+    case IR::Opcode::FPSaturate64:
+    case IR::Opcode::FPClamp64:
+    case IR::Opcode::FPTrunc64:
+    case IR::Opcode::FPOrdEqual64:
+    case IR::Opcode::FPUnordEqual64:
+    case IR::Opcode::FPOrdNotEqual64:
+    case IR::Opcode::FPUnordNotEqual64:
+    case IR::Opcode::FPOrdLessThan64:
+    case IR::Opcode::FPUnordLessThan64:
+    case IR::Opcode::FPOrdGreaterThan64:
+    case IR::Opcode::FPUnordGreaterThan64:
+    case IR::Opcode::FPOrdLessThanEqual64:
+    case IR::Opcode::FPUnordLessThanEqual64:
+    case IR::Opcode::FPOrdGreaterThanEqual64:
+    case IR::Opcode::FPUnordGreaterThanEqual64:
+    case IR::Opcode::FPIsNan64:
+    case IR::Opcode::ConvertS16F64:
+    case IR::Opcode::ConvertS32F64:
+    case IR::Opcode::ConvertS64F64:
+    case IR::Opcode::ConvertU16F64:
+    case IR::Opcode::ConvertU32F64:
+    case IR::Opcode::ConvertU64F64:
+    case IR::Opcode::ConvertF32F64:
+    case IR::Opcode::ConvertF64F32:
+    case IR::Opcode::ConvertF64S8:
+    case IR::Opcode::ConvertF64S16:
+    case IR::Opcode::ConvertF64S32:
+    case IR::Opcode::ConvertF64S64:
+    case IR::Opcode::ConvertF64U8:
+    case IR::Opcode::ConvertF64U16:
+    case IR::Opcode::ConvertF64U32:
+    case IR::Opcode::ConvertF64U64:
+        info.uses_fp64 = true;
+        break;
+    default:
+        break;
+    }
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::GetCbufU8:
+    case IR::Opcode::GetCbufS8:
+    case IR::Opcode::UndefU8:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::LoadStorageU8:
+    case IR::Opcode::LoadStorageS8:
+    case IR::Opcode::WriteStorageU8:
+    case IR::Opcode::WriteStorageS8:
+    case IR::Opcode::LoadSharedU8:
+    case IR::Opcode::LoadSharedS8:
+    case IR::Opcode::WriteSharedU8:
+    case IR::Opcode::SelectU8:
+    case IR::Opcode::ConvertF16S8:
+    case IR::Opcode::ConvertF16U8:
+    case IR::Opcode::ConvertF32S8:
+    case IR::Opcode::ConvertF32U8:
+    case IR::Opcode::ConvertF64S8:
+    case IR::Opcode::ConvertF64U8:
+        info.uses_int8 = true;
+        break;
+    default:
+        break;
+    }
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::GetCbufU16:
+    case IR::Opcode::GetCbufS16:
+    case IR::Opcode::UndefU16:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::LoadStorageU16:
+    case IR::Opcode::LoadStorageS16:
+    case IR::Opcode::WriteStorageU16:
+    case IR::Opcode::WriteStorageS16:
+    case IR::Opcode::LoadSharedU16:
+    case IR::Opcode::LoadSharedS16:
+    case IR::Opcode::WriteSharedU16:
+    case IR::Opcode::SelectU16:
+    case IR::Opcode::BitCastU16F16:
+    case IR::Opcode::BitCastF16U16:
+    case IR::Opcode::ConvertS16F16:
+    case IR::Opcode::ConvertS16F32:
+    case IR::Opcode::ConvertS16F64:
+    case IR::Opcode::ConvertU16F16:
+    case IR::Opcode::ConvertU16F32:
+    case IR::Opcode::ConvertU16F64:
+    case IR::Opcode::ConvertF16S16:
+    case IR::Opcode::ConvertF16U16:
+    case IR::Opcode::ConvertF32S16:
+    case IR::Opcode::ConvertF32U16:
+    case IR::Opcode::ConvertF64S16:
+    case IR::Opcode::ConvertF64U16:
+        info.uses_int16 = true;
+        break;
+    default:
+        break;
+    }
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::UndefU64:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::LoadGlobal32:
+    case IR::Opcode::LoadGlobal64:
+    case IR::Opcode::LoadGlobal128:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+    case IR::Opcode::SelectU64:
+    case IR::Opcode::BitCastU64F64:
+    case IR::Opcode::BitCastF64U64:
+    case IR::Opcode::PackUint2x32:
+    case IR::Opcode::UnpackUint2x32:
+    case IR::Opcode::IAdd64:
+    case IR::Opcode::ISub64:
+    case IR::Opcode::INeg64:
+    case IR::Opcode::ShiftLeftLogical64:
+    case IR::Opcode::ShiftRightLogical64:
+    case IR::Opcode::ShiftRightArithmetic64:
+    case IR::Opcode::ConvertS64F16:
+    case IR::Opcode::ConvertS64F32:
+    case IR::Opcode::ConvertS64F64:
+    case IR::Opcode::ConvertU64F16:
+    case IR::Opcode::ConvertU64F32:
+    case IR::Opcode::ConvertU64F64:
+    case IR::Opcode::ConvertU64U32:
+    case IR::Opcode::ConvertU32U64:
+    case IR::Opcode::ConvertF16U64:
+    case IR::Opcode::ConvertF32U64:
+    case IR::Opcode::ConvertF64U64:
+    case IR::Opcode::SharedAtomicExchange64:
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::StorageAtomicIAdd64:
+    case IR::Opcode::StorageAtomicSMin64:
+    case IR::Opcode::StorageAtomicUMin64:
+    case IR::Opcode::StorageAtomicSMax64:
+    case IR::Opcode::StorageAtomicUMax64:
+    case IR::Opcode::StorageAtomicAnd64:
+    case IR::Opcode::StorageAtomicOr64:
+    case IR::Opcode::StorageAtomicXor64:
+    case IR::Opcode::StorageAtomicExchange64:
+        info.uses_int64 = true;
+        break;
+    default:
+        break;
+    }
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+    case IR::Opcode::GlobalAtomicIAdd32:
+    case IR::Opcode::GlobalAtomicSMin32:
+    case IR::Opcode::GlobalAtomicUMin32:
+    case IR::Opcode::GlobalAtomicSMax32:
+    case IR::Opcode::GlobalAtomicUMax32:
+    case IR::Opcode::GlobalAtomicInc32:
+    case IR::Opcode::GlobalAtomicDec32:
+    case IR::Opcode::GlobalAtomicAnd32:
+    case IR::Opcode::GlobalAtomicOr32:
+    case IR::Opcode::GlobalAtomicXor32:
+    case IR::Opcode::GlobalAtomicExchange32:
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::GlobalAtomicAddF32:
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::GlobalAtomicAddF32x2:
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::GlobalAtomicMinF32x2:
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+        info.stores_global_memory = true;
+        [[fallthrough]];
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::LoadGlobal32:
+    case IR::Opcode::LoadGlobal64:
+    case IR::Opcode::LoadGlobal128:
+        info.uses_int64 = true;
+        info.uses_global_memory = true;
+        info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
+        info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
+        break;
+    default:
+        break;
+    }
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::DemoteToHelperInvocation:
+        info.uses_demote_to_helper_invocation = true;
+        break;
+    case IR::Opcode::GetAttribute:
+        info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
+        break;
+    case IR::Opcode::SetAttribute:
+        info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
+        break;
+    case IR::Opcode::GetPatch:
+        GetPatch(info, inst.Arg(0).Patch());
+        break;
+    case IR::Opcode::SetPatch:
+        SetPatch(info, inst.Arg(0).Patch());
+        break;
+    case IR::Opcode::GetAttributeIndexed:
+        info.loads_indexed_attributes = true;
+        break;
+    case IR::Opcode::SetAttributeIndexed:
+        info.stores_indexed_attributes = true;
+        break;
+    case IR::Opcode::SetFragColor:
+        info.stores_frag_color[inst.Arg(0).U32()] = true;
+        break;
+    case IR::Opcode::SetSampleMask:
+        info.stores_sample_mask = true;
+        break;
+    case IR::Opcode::SetFragDepth:
+        info.stores_frag_depth = true;
+        break;
+    case IR::Opcode::WorkgroupId:
+        info.uses_workgroup_id = true;
+        break;
+    case IR::Opcode::LocalInvocationId:
+        info.uses_local_invocation_id = true;
+        break;
+    case IR::Opcode::InvocationId:
+        info.uses_invocation_id = true;
+        break;
+    case IR::Opcode::SampleId:
+        info.uses_sample_id = true;
+        break;
+    case IR::Opcode::IsHelperInvocation:
+        info.uses_is_helper_invocation = true;
+        break;
+    case IR::Opcode::LaneId:
+        info.uses_subgroup_invocation_id = true;
+        break;
+    case IR::Opcode::ShuffleIndex:
+    case IR::Opcode::ShuffleUp:
+    case IR::Opcode::ShuffleDown:
+    case IR::Opcode::ShuffleButterfly:
+        info.uses_subgroup_shuffles = true;
+        break;
+    case IR::Opcode::GetCbufU8:
+    case IR::Opcode::GetCbufS8:
+    case IR::Opcode::GetCbufU16:
+    case IR::Opcode::GetCbufS16:
+    case IR::Opcode::GetCbufU32:
+    case IR::Opcode::GetCbufF32:
+    case IR::Opcode::GetCbufU32x2: {
+        const IR::Value index{inst.Arg(0)};
+        const IR::Value offset{inst.Arg(1)};
+        if (!index.IsImmediate()) {
+            throw NotImplementedException("Constant buffer with non-immediate index");
+        }
+        AddConstantBufferDescriptor(info, index.U32(), 1);
+        u32 element_size{};
+        switch (inst.GetOpcode()) {
+        case IR::Opcode::GetCbufU8:
+        case IR::Opcode::GetCbufS8:
+            info.used_constant_buffer_types |= IR::Type::U8;
+            element_size = 1;
+            break;
+        case IR::Opcode::GetCbufU16:
+        case IR::Opcode::GetCbufS16:
+            info.used_constant_buffer_types |= IR::Type::U16;
+            element_size = 2;
+            break;
+        case IR::Opcode::GetCbufU32:
+            info.used_constant_buffer_types |= IR::Type::U32;
+            element_size = 4;
+            break;
+        case IR::Opcode::GetCbufF32:
+            info.used_constant_buffer_types |= IR::Type::F32;
+            element_size = 4;
+            break;
+        case IR::Opcode::GetCbufU32x2:
+            info.used_constant_buffer_types |= IR::Type::U32x2;
+            element_size = 8;
+            break;
+        default:
+            break;
+        }
+        u32& size{info.constant_buffer_used_sizes[index.U32()]};
+        if (offset.IsImmediate()) {
+            size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
+        } else {
+            size = 0x10'000;
+        }
+        break;
+    }
+    case IR::Opcode::BindlessImageSampleImplicitLod:
+    case IR::Opcode::BindlessImageSampleExplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+    case IR::Opcode::BindlessImageGather:
+    case IR::Opcode::BindlessImageGatherDref:
+    case IR::Opcode::BindlessImageFetch:
+    case IR::Opcode::BindlessImageQueryDimensions:
+    case IR::Opcode::BindlessImageQueryLod:
+    case IR::Opcode::BindlessImageGradient:
+    case IR::Opcode::BoundImageSampleImplicitLod:
+    case IR::Opcode::BoundImageSampleExplicitLod:
+    case IR::Opcode::BoundImageSampleDrefImplicitLod:
+    case IR::Opcode::BoundImageSampleDrefExplicitLod:
+    case IR::Opcode::BoundImageGather:
+    case IR::Opcode::BoundImageGatherDref:
+    case IR::Opcode::BoundImageFetch:
+    case IR::Opcode::BoundImageQueryDimensions:
+    case IR::Opcode::BoundImageQueryLod:
+    case IR::Opcode::BoundImageGradient:
+    case IR::Opcode::ImageGather:
+    case IR::Opcode::ImageGatherDref:
+    case IR::Opcode::ImageFetch:
+    case IR::Opcode::ImageQueryDimensions:
+    case IR::Opcode::ImageGradient: {
+        const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
+        info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
+        info.uses_sparse_residency |=
+            inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+        break;
+    }
+    case IR::Opcode::ImageSampleImplicitLod:
+    case IR::Opcode::ImageSampleExplicitLod:
+    case IR::Opcode::ImageSampleDrefImplicitLod:
+    case IR::Opcode::ImageSampleDrefExplicitLod:
+    case IR::Opcode::ImageQueryLod: {
+        const auto flags{inst.Flags<IR::TextureInstInfo>()};
+        const TextureType type{flags.type};
+        info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
+        info.uses_shadow_lod |= flags.is_depth != 0;
+        info.uses_sparse_residency |=
+            inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+        break;
+    }
+    case IR::Opcode::ImageRead: {
+        const auto flags{inst.Flags<IR::TextureInstInfo>()};
+        info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless;
+        info.uses_sparse_residency |=
+            inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+        break;
+    }
+    case IR::Opcode::ImageWrite: {
+        const auto flags{inst.Flags<IR::TextureInstInfo>()};
+        info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless;
+        info.uses_image_buffers |= flags.type == TextureType::Buffer;
+        break;
+    }
+    case IR::Opcode::SubgroupEqMask:
+    case IR::Opcode::SubgroupLtMask:
+    case IR::Opcode::SubgroupLeMask:
+    case IR::Opcode::SubgroupGtMask:
+    case IR::Opcode::SubgroupGeMask:
+        info.uses_subgroup_mask = true;
+        break;
+    case IR::Opcode::VoteAll:
+    case IR::Opcode::VoteAny:
+    case IR::Opcode::VoteEqual:
+    case IR::Opcode::SubgroupBallot:
+        info.uses_subgroup_vote = true;
+        break;
+    case IR::Opcode::FSwizzleAdd:
+        info.uses_fswzadd = true;
+        break;
+    case IR::Opcode::DPdxFine:
+    case IR::Opcode::DPdyFine:
+    case IR::Opcode::DPdxCoarse:
+    case IR::Opcode::DPdyCoarse:
+        info.uses_derivatives = true;
+        break;
+    case IR::Opcode::LoadStorageU8:
+    case IR::Opcode::LoadStorageS8:
+    case IR::Opcode::WriteStorageU8:
+    case IR::Opcode::WriteStorageS8:
+        info.used_storage_buffer_types |= IR::Type::U8;
+        break;
+    case IR::Opcode::LoadStorageU16:
+    case IR::Opcode::LoadStorageS16:
+    case IR::Opcode::WriteStorageU16:
+    case IR::Opcode::WriteStorageS16:
+        info.used_storage_buffer_types |= IR::Type::U16;
+        break;
+    case IR::Opcode::LoadStorage32:
+    case IR::Opcode::WriteStorage32:
+    case IR::Opcode::StorageAtomicIAdd32:
+    case IR::Opcode::StorageAtomicUMin32:
+    case IR::Opcode::StorageAtomicUMax32:
+    case IR::Opcode::StorageAtomicAnd32:
+    case IR::Opcode::StorageAtomicOr32:
+    case IR::Opcode::StorageAtomicXor32:
+    case IR::Opcode::StorageAtomicExchange32:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        break;
+    case IR::Opcode::LoadStorage64:
+    case IR::Opcode::WriteStorage64:
+        info.used_storage_buffer_types |= IR::Type::U32x2;
+        break;
+    case IR::Opcode::LoadStorage128:
+    case IR::Opcode::WriteStorage128:
+        info.used_storage_buffer_types |= IR::Type::U32x4;
+        break;
+    case IR::Opcode::SharedAtomicSMin32:
+        info.uses_atomic_s32_min = true;
+        break;
+    case IR::Opcode::SharedAtomicSMax32:
+        info.uses_atomic_s32_max = true;
+        break;
+    case IR::Opcode::SharedAtomicInc32:
+        info.uses_shared_increment = true;
+        break;
+    case IR::Opcode::SharedAtomicDec32:
+        info.uses_shared_decrement = true;
+        break;
+    case IR::Opcode::SharedAtomicExchange64:
+        info.uses_int64_bit_atomics = true;
+        break;
+    case IR::Opcode::GlobalAtomicInc32:
+    case IR::Opcode::StorageAtomicInc32:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_global_increment = true;
+        break;
+    case IR::Opcode::GlobalAtomicDec32:
+    case IR::Opcode::StorageAtomicDec32:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_global_decrement = true;
+        break;
+    case IR::Opcode::GlobalAtomicAddF32:
+    case IR::Opcode::StorageAtomicAddF32:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_f32_add = true;
+        break;
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::StorageAtomicAddF16x2:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_f16x2_add = true;
+        break;
+    case IR::Opcode::GlobalAtomicAddF32x2:
+    case IR::Opcode::StorageAtomicAddF32x2:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_f32x2_add = true;
+        break;
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::StorageAtomicMinF16x2:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_f16x2_min = true;
+        break;
+    case IR::Opcode::GlobalAtomicMinF32x2:
+    case IR::Opcode::StorageAtomicMinF32x2:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_f32x2_min = true;
+        break;
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::StorageAtomicMaxF16x2:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_f16x2_max = true;
+        break;
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+    case IR::Opcode::StorageAtomicMaxF32x2:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_f32x2_max = true;
+        break;
+    case IR::Opcode::StorageAtomicSMin32:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_s32_min = true;
+        break;
+    case IR::Opcode::StorageAtomicSMax32:
+        info.used_storage_buffer_types |= IR::Type::U32;
+        info.uses_atomic_s32_max = true;
+        break;
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::StorageAtomicIAdd64:
+    case IR::Opcode::StorageAtomicSMin64:
+    case IR::Opcode::StorageAtomicUMin64:
+    case IR::Opcode::StorageAtomicSMax64:
+    case IR::Opcode::StorageAtomicUMax64:
+    case IR::Opcode::StorageAtomicAnd64:
+    case IR::Opcode::StorageAtomicOr64:
+    case IR::Opcode::StorageAtomicXor64:
+        info.used_storage_buffer_types |= IR::Type::U64;
+        info.uses_int64_bit_atomics = true;
+        break;
+    case IR::Opcode::BindlessImageAtomicIAdd32:
+    case IR::Opcode::BindlessImageAtomicSMin32:
+    case IR::Opcode::BindlessImageAtomicUMin32:
+    case IR::Opcode::BindlessImageAtomicSMax32:
+    case IR::Opcode::BindlessImageAtomicUMax32:
+    case IR::Opcode::BindlessImageAtomicInc32:
+    case IR::Opcode::BindlessImageAtomicDec32:
+    case IR::Opcode::BindlessImageAtomicAnd32:
+    case IR::Opcode::BindlessImageAtomicOr32:
+    case IR::Opcode::BindlessImageAtomicXor32:
+    case IR::Opcode::BindlessImageAtomicExchange32:
+    case IR::Opcode::BoundImageAtomicIAdd32:
+    case IR::Opcode::BoundImageAtomicSMin32:
+    case IR::Opcode::BoundImageAtomicUMin32:
+    case IR::Opcode::BoundImageAtomicSMax32:
+    case IR::Opcode::BoundImageAtomicUMax32:
+    case IR::Opcode::BoundImageAtomicInc32:
+    case IR::Opcode::BoundImageAtomicDec32:
+    case IR::Opcode::BoundImageAtomicAnd32:
+    case IR::Opcode::BoundImageAtomicOr32:
+    case IR::Opcode::BoundImageAtomicXor32:
+    case IR::Opcode::BoundImageAtomicExchange32:
+    case IR::Opcode::ImageAtomicIAdd32:
+    case IR::Opcode::ImageAtomicSMin32:
+    case IR::Opcode::ImageAtomicUMin32:
+    case IR::Opcode::ImageAtomicSMax32:
+    case IR::Opcode::ImageAtomicUMax32:
+    case IR::Opcode::ImageAtomicInc32:
+    case IR::Opcode::ImageAtomicDec32:
+    case IR::Opcode::ImageAtomicAnd32:
+    case IR::Opcode::ImageAtomicOr32:
+    case IR::Opcode::ImageAtomicXor32:
+    case IR::Opcode::ImageAtomicExchange32:
+        info.uses_atomic_image_u32 = true;
+        break;
+    default:
+        break;
+    }
+}
+
+void VisitFpModifiers(Info& info, IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::FPAdd16:
+    case IR::Opcode::FPFma16:
+    case IR::Opcode::FPMul16:
+    case IR::Opcode::FPRoundEven16:
+    case IR::Opcode::FPFloor16:
+    case IR::Opcode::FPCeil16:
+    case IR::Opcode::FPTrunc16: {
+        const auto control{inst.Flags<IR::FpControl>()};
+        switch (control.fmz_mode) {
+        case IR::FmzMode::DontCare:
+            break;
+        case IR::FmzMode::FTZ:
+        case IR::FmzMode::FMZ:
+            info.uses_fp16_denorms_flush = true;
+            break;
+        case IR::FmzMode::None:
+            info.uses_fp16_denorms_preserve = true;
+            break;
+        }
+        break;
+    }
+    case IR::Opcode::FPAdd32:
+    case IR::Opcode::FPFma32:
+    case IR::Opcode::FPMul32:
+    case IR::Opcode::FPRoundEven32:
+    case IR::Opcode::FPFloor32:
+    case IR::Opcode::FPCeil32:
+    case IR::Opcode::FPTrunc32:
+    case IR::Opcode::FPOrdEqual32:
+    case IR::Opcode::FPUnordEqual32:
+    case IR::Opcode::FPOrdNotEqual32:
+    case IR::Opcode::FPUnordNotEqual32:
+    case IR::Opcode::FPOrdLessThan32:
+    case IR::Opcode::FPUnordLessThan32:
+    case IR::Opcode::FPOrdGreaterThan32:
+    case IR::Opcode::FPUnordGreaterThan32:
+    case IR::Opcode::FPOrdLessThanEqual32:
+    case IR::Opcode::FPUnordLessThanEqual32:
+    case IR::Opcode::FPOrdGreaterThanEqual32:
+    case IR::Opcode::FPUnordGreaterThanEqual32:
+    case IR::Opcode::ConvertF16F32:
+    case IR::Opcode::ConvertF64F32: {
+        const auto control{inst.Flags<IR::FpControl>()};
+        switch (control.fmz_mode) {
+        case IR::FmzMode::DontCare:
+            break;
+        case IR::FmzMode::FTZ:
+        case IR::FmzMode::FMZ:
+            info.uses_fp32_denorms_flush = true;
+            break;
+        case IR::FmzMode::None:
+            info.uses_fp32_denorms_preserve = true;
+            break;
+        }
+        break;
+    }
+    default:
+        break;
+    }
+}
+
+void VisitCbufs(Info& info, IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::GetCbufU8:
+    case IR::Opcode::GetCbufS8:
+    case IR::Opcode::GetCbufU16:
+    case IR::Opcode::GetCbufS16:
+    case IR::Opcode::GetCbufU32:
+    case IR::Opcode::GetCbufF32:
+    case IR::Opcode::GetCbufU32x2: {
+        CheckCBufNVN(info, inst);
+        break;
+    }
+    default:
+        break;
+    }
+}
+
+void Visit(Info& info, IR::Inst& inst) {
+    VisitUsages(info, inst);
+    VisitFpModifiers(info, inst);
+    VisitCbufs(info, inst);
+}
+
+void GatherInfoFromHeader(Environment& env, Info& info) {
+    Stage stage{env.ShaderStage()};
+    if (stage == Stage::Compute) {
+        return;
+    }
+    const auto& header{env.SPH()};
+    if (stage == Stage::Fragment) {
+        if (!info.loads_indexed_attributes) {
+            return;
+        }
+        for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+            const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+            const auto vector{header.ps.imap_generic_vector[index]};
+            info.loads.mask[offset + 0] = vector.x != PixelImap::Unused;
+            info.loads.mask[offset + 1] = vector.y != PixelImap::Unused;
+            info.loads.mask[offset + 2] = vector.z != PixelImap::Unused;
+            info.loads.mask[offset + 3] = vector.w != PixelImap::Unused;
+        }
+        return;
+    }
+    if (info.loads_indexed_attributes) {
+        for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+            const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
+            const auto mask = header.vtg.InputGeneric(index);
+            for (size_t i = 0; i < 4; ++i) {
+                info.loads.Set(attribute + i, mask[i]);
+            }
+        }
+        for (size_t index = 0; index < 8; ++index) {
+            const u16 mask{header.vtg.clip_distances};
+            info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
+        }
+        info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0);
+        info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0);
+        info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0);
+        info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0);
+        info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0);
+        info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0);
+        info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0);
+        info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0);
+        info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0);
+        info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0);
+        info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0);
+        info.loads.Set(IR::Attribute::TessellationEvaluationPointU,
+                       header.vtg.tessellation_eval_point_u != 0);
+        info.loads.Set(IR::Attribute::TessellationEvaluationPointV,
+                       header.vtg.tessellation_eval_point_v != 0);
+        info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0);
+        info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0);
+        // TODO: Legacy varyings
+    }
+    if (info.stores_indexed_attributes) {
+        for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+            const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
+            const auto mask{header.vtg.OutputGeneric(index)};
+            for (size_t i = 0; i < 4; ++i) {
+                info.stores.Set(attribute + i, mask[i]);
+            }
+        }
+        for (size_t index = 0; index < 8; ++index) {
+            const u16 mask{header.vtg.omap_systemc.clip_distances};
+            info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
+        }
+        info.stores.Set(IR::Attribute::PrimitiveId,
+                        header.vtg.omap_systemb.primitive_array_id != 0);
+        info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0);
+        info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0);
+        info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0);
+        info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0);
+        info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0);
+        info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0);
+        info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0);
+        info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0);
+        info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0);
+        info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0);
+        info.stores.Set(IR::Attribute::TessellationEvaluationPointU,
+                        header.vtg.omap_systemc.tessellation_eval_point_u != 0);
+        info.stores.Set(IR::Attribute::TessellationEvaluationPointV,
+                        header.vtg.omap_systemc.tessellation_eval_point_v != 0);
+        info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0);
+        info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0);
+        // TODO: Legacy varyings
+    }
+}
+} // Anonymous namespace
+
+void CollectShaderInfoPass(Environment& env, IR::Program& program) {
+    Info& info{program.info};
+    const u32 base{[&] {
+        switch (program.stage) {
+        case Stage::VertexA:
+        case Stage::VertexB:
+            return 0x110u;
+        case Stage::TessellationControl:
+            return 0x210u;
+        case Stage::TessellationEval:
+            return 0x310u;
+        case Stage::Geometry:
+            return 0x410u;
+        case Stage::Fragment:
+            return 0x510u;
+        case Stage::Compute:
+            return 0x310u;
+        }
+        throw InvalidArgument("Invalid stage {}", program.stage);
+    }()};
+    info.nvn_buffer_base = base;
+
+    for (IR::Block* const block : program.post_order_blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            Visit(info, inst);
+        }
+    }
+    GatherInfoFromHeader(env, info);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..8dd6d6c2c
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,610 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <tuple>
+#include <type_traits>
+
+#include "common/bit_cast.h"
+#include "common/bit_util.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+// Metaprogramming stuff to get arguments information out of a lambda
+template <typename Func>
+struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {};
+
+template <typename ReturnType, typename LambdaType, typename... Args>
+struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> {
+    template <size_t I>
+    using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+
+    static constexpr size_t NUM_ARGS{sizeof...(Args)};
+};
+
+template <typename T>
+[[nodiscard]] T Arg(const IR::Value& value) {
+    if constexpr (std::is_same_v<T, bool>) {
+        return value.U1();
+    } else if constexpr (std::is_same_v<T, u32>) {
+        return value.U32();
+    } else if constexpr (std::is_same_v<T, s32>) {
+        return static_cast<s32>(value.U32());
+    } else if constexpr (std::is_same_v<T, f32>) {
+        return value.F32();
+    } else if constexpr (std::is_same_v<T, u64>) {
+        return value.U64();
+    }
+}
+
+template <typename T, typename ImmFn>
+bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
+    const IR::Value lhs{inst.Arg(0)};
+    const IR::Value rhs{inst.Arg(1)};
+
+    const bool is_lhs_immediate{lhs.IsImmediate()};
+    const bool is_rhs_immediate{rhs.IsImmediate()};
+
+    if (is_lhs_immediate && is_rhs_immediate) {
+        const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
+        inst.ReplaceUsesWith(IR::Value{result});
+        return false;
+    }
+    if (is_lhs_immediate && !is_rhs_immediate) {
+        IR::Inst* const rhs_inst{rhs.InstRecursive()};
+        if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
+            const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
+            inst.SetArg(0, rhs_inst->Arg(0));
+            inst.SetArg(1, IR::Value{combined});
+        } else {
+            // Normalize
+            inst.SetArg(0, rhs);
+            inst.SetArg(1, lhs);
+        }
+    }
+    if (!is_lhs_immediate && is_rhs_immediate) {
+        const IR::Inst* const lhs_inst{lhs.InstRecursive()};
+        if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
+            const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
+            inst.SetArg(0, lhs_inst->Arg(0));
+            inst.SetArg(1, IR::Value{combined});
+        }
+    }
+    return true;
+}
+
+template <typename Func>
+bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
+    if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
+        return false;
+    }
+    using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
+    inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
+    return true;
+}
+
+void FoldGetRegister(IR::Inst& inst) {
+    if (inst.Arg(0).Reg() == IR::Reg::RZ) {
+        inst.ReplaceUsesWith(IR::Value{u32{0}});
+    }
+}
+
+void FoldGetPred(IR::Inst& inst) {
+    if (inst.Arg(0).Pred() == IR::Pred::PT) {
+        inst.ReplaceUsesWith(IR::Value{true});
+    }
+}
+
+/// Replaces the pattern generated by two XMAD multiplications
+bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
+    /*
+     * We are looking for this pattern:
+     *   %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
+     *   %rhs_mul = IMul32 %rhs_bfe, %factor_b
+     *   %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
+     *   %rhs_mul = IMul32 %lhs_bfe, %factor_b
+     *   %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
+     *   %result  = IAdd32 %lhs_shl, %rhs_mul
+     *
+     * And replacing it with
+     *   %result  = IMul32 %factor_a, %factor_b
+     *
+     * This optimization has been proven safe by LLVM and MSVC.
+     */
+    const IR::Value lhs_arg{inst.Arg(0)};
+    const IR::Value rhs_arg{inst.Arg(1)};
+    if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
+        return false;
+    }
+    IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
+    if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
+        lhs_shl->Arg(1) != IR::Value{16U}) {
+        return false;
+    }
+    if (lhs_shl->Arg(0).IsImmediate()) {
+        return false;
+    }
+    IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
+    IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
+    if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
+        return false;
+    }
+    if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
+        return false;
+    }
+    const IR::U32 factor_b{lhs_mul->Arg(1)};
+    if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
+        return false;
+    }
+    IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
+    IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
+    if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+        return false;
+    }
+    if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+        return false;
+    }
+    if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
+        return false;
+    }
+    if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
+        return false;
+    }
+    if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
+        return false;
+    }
+    const IR::U32 factor_a{lhs_bfe->Arg(0)};
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
+    return true;
+}
+
+template <typename T>
+void FoldAdd(IR::Block& block, IR::Inst& inst) {
+    if (inst.HasAssociatedPseudoOperation()) {
+        return;
+    }
+    if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
+        return;
+    }
+    const IR::Value rhs{inst.Arg(1)};
+    if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
+        inst.ReplaceUsesWith(inst.Arg(0));
+        return;
+    }
+    if constexpr (std::is_same_v<T, u32>) {
+        if (FoldXmadMultiply(block, inst)) {
+            return;
+        }
+    }
+}
+
+void FoldISub32(IR::Inst& inst) {
+    if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) {
+        return;
+    }
+    if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) {
+        return;
+    }
+    // ISub32 is generally used to subtract two constant buffers, compare and replace this with
+    // zero if they equal.
+    const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
+        return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
+               b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
+               a->Arg(1) == b->Arg(1);
+    }};
+    IR::Inst* op_a{inst.Arg(0).InstRecursive()};
+    IR::Inst* op_b{inst.Arg(1).InstRecursive()};
+    if (equal_cbuf(op_a, op_b)) {
+        inst.ReplaceUsesWith(IR::Value{u32{0}});
+        return;
+    }
+    // It's also possible a value is being added to a cbuf and then subtracted
+    if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
+        // Canonicalize local variables to simplify the following logic
+        std::swap(op_a, op_b);
+    }
+    if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
+        return;
+    }
+    IR::Inst* const inst_cbuf{op_b};
+    if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
+        return;
+    }
+    IR::Value add_op_a{op_a->Arg(0)};
+    IR::Value add_op_b{op_a->Arg(1)};
+    if (add_op_b.IsImmediate()) {
+        // Canonicalize
+        std::swap(add_op_a, add_op_b);
+    }
+    if (add_op_b.IsImmediate()) {
+        return;
+    }
+    IR::Inst* const add_cbuf{add_op_b.InstRecursive()};
+    if (equal_cbuf(add_cbuf, inst_cbuf)) {
+        inst.ReplaceUsesWith(add_op_a);
+    }
+}
+
+void FoldSelect(IR::Inst& inst) {
+    const IR::Value cond{inst.Arg(0)};
+    if (cond.IsImmediate()) {
+        inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
+    }
+}
+
+void FoldFPMul32(IR::Inst& inst) {
+    const auto control{inst.Flags<IR::FpControl>()};
+    if (control.no_contraction) {
+        return;
+    }
+    // Fold interpolation operations
+    const IR::Value lhs_value{inst.Arg(0)};
+    const IR::Value rhs_value{inst.Arg(1)};
+    if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
+        return;
+    }
+    IR::Inst* const lhs_op{lhs_value.InstRecursive()};
+    IR::Inst* const rhs_op{rhs_value.InstRecursive()};
+    if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
+        rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
+        return;
+    }
+    const IR::Value recip_source{rhs_op->Arg(0)};
+    const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()};
+    if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) {
+        return;
+    }
+    IR::Inst* const attr_a{recip_source.InstRecursive()};
+    IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
+    if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
+        attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
+        return;
+    }
+    if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
+        inst.ReplaceUsesWith(lhs_op->Arg(0));
+    }
+}
+
+void FoldLogicalAnd(IR::Inst& inst) {
+    if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
+        return;
+    }
+    const IR::Value rhs{inst.Arg(1)};
+    if (rhs.IsImmediate()) {
+        if (rhs.U1()) {
+            inst.ReplaceUsesWith(inst.Arg(0));
+        } else {
+            inst.ReplaceUsesWith(IR::Value{false});
+        }
+    }
+}
+
+void FoldLogicalOr(IR::Inst& inst) {
+    if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
+        return;
+    }
+    const IR::Value rhs{inst.Arg(1)};
+    if (rhs.IsImmediate()) {
+        if (rhs.U1()) {
+            inst.ReplaceUsesWith(IR::Value{true});
+        } else {
+            inst.ReplaceUsesWith(inst.Arg(0));
+        }
+    }
+}
+
+void FoldLogicalNot(IR::Inst& inst) {
+    const IR::U1 value{inst.Arg(0)};
+    if (value.IsImmediate()) {
+        inst.ReplaceUsesWith(IR::Value{!value.U1()});
+        return;
+    }
+    IR::Inst* const arg{value.InstRecursive()};
+    if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
+        inst.ReplaceUsesWith(arg->Arg(0));
+    }
+}
+
+template <IR::Opcode op, typename Dest, typename Source>
+void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
+    const IR::Value value{inst.Arg(0)};
+    if (value.IsImmediate()) {
+        inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
+        return;
+    }
+    IR::Inst* const arg_inst{value.InstRecursive()};
+    if (arg_inst->GetOpcode() == reverse) {
+        inst.ReplaceUsesWith(arg_inst->Arg(0));
+        return;
+    }
+    if constexpr (op == IR::Opcode::BitCastF32U32) {
+        if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
+            // Replace the bitcast with a typed constant buffer read
+            inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
+            inst.SetArg(0, arg_inst->Arg(0));
+            inst.SetArg(1, arg_inst->Arg(1));
+            return;
+        }
+    }
+}
+
+void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
+    const IR::Value value{inst.Arg(0)};
+    if (value.IsImmediate()) {
+        return;
+    }
+    IR::Inst* const arg_inst{value.InstRecursive()};
+    if (arg_inst->GetOpcode() == reverse) {
+        inst.ReplaceUsesWith(arg_inst->Arg(0));
+        return;
+    }
+}
+
+template <typename Func, size_t... I>
+IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
+    using Traits = LambdaTraits<decltype(func)>;
+    return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
+}
+
+std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
+                                                  IR::Opcode construct, u32 first_index) {
+    IR::Inst* const inst{inst_value.InstRecursive()};
+    if (inst->GetOpcode() == construct) {
+        return inst->Arg(first_index);
+    }
+    if (inst->GetOpcode() != insert) {
+        return std::nullopt;
+    }
+    IR::Value value_index{inst->Arg(2)};
+    if (!value_index.IsImmediate()) {
+        return std::nullopt;
+    }
+    const u32 second_index{value_index.U32()};
+    if (first_index != second_index) {
+        IR::Value value_composite{inst->Arg(0)};
+        if (value_composite.IsImmediate()) {
+            return std::nullopt;
+        }
+        return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
+    }
+    return inst->Arg(1);
+}
+
+void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
+    const IR::Value value_1{inst.Arg(0)};
+    const IR::Value value_2{inst.Arg(1)};
+    if (value_1.IsImmediate()) {
+        return;
+    }
+    if (!value_2.IsImmediate()) {
+        return;
+    }
+    const u32 first_index{value_2.U32()};
+    const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
+    if (!result) {
+        return;
+    }
+    inst.ReplaceUsesWith(*result);
+}
+
+IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) {
+    if (value.IsImmediate()) {
+        return value;
+    }
+    IR::Inst* const inst{value.InstRecursive()};
+    if (inst->GetOpcode() == expected_cast) {
+        return inst->Arg(0).Resolve();
+    }
+    return value;
+}
+
+void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
+    const IR::Value swizzle{inst.Arg(2)};
+    if (!swizzle.IsImmediate()) {
+        return;
+    }
+    const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)};
+    const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)};
+    if (value_1.IsImmediate()) {
+        return;
+    }
+    const u32 swizzle_value{swizzle.U32()};
+    if (swizzle_value != 0x99 && swizzle_value != 0xA5) {
+        return;
+    }
+    IR::Inst* const inst2{value_1.InstRecursive()};
+    if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) {
+        return;
+    }
+    const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
+    if (value_2 != value_3) {
+        return;
+    }
+    const IR::Value index{inst2->Arg(1)};
+    const IR::Value clamp{inst2->Arg(2)};
+    const IR::Value segmentation_mask{inst2->Arg(3)};
+    if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
+        return;
+    }
+    if (clamp.U32() != 3 || segmentation_mask.U32() != 28) {
+        return;
+    }
+    if (swizzle_value == 0x99) {
+        // DPdxFine
+        if (index.U32() == 1) {
+            IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+            inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)}));
+        }
+    } else if (swizzle_value == 0xA5) {
+        // DPdyFine
+        if (index.U32() == 2) {
+            IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+            inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)}));
+        }
+    }
+}
+
+void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::GetRegister:
+        return FoldGetRegister(inst);
+    case IR::Opcode::GetPred:
+        return FoldGetPred(inst);
+    case IR::Opcode::IAdd32:
+        return FoldAdd<u32>(block, inst);
+    case IR::Opcode::ISub32:
+        return FoldISub32(inst);
+    case IR::Opcode::IMul32:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
+        return;
+    case IR::Opcode::ShiftRightArithmetic32:
+        FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
+        return;
+    case IR::Opcode::BitCastF32U32:
+        return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
+    case IR::Opcode::BitCastU32F32:
+        return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
+    case IR::Opcode::IAdd64:
+        return FoldAdd<u64>(block, inst);
+    case IR::Opcode::PackHalf2x16:
+        return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
+    case IR::Opcode::UnpackHalf2x16:
+        return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
+    case IR::Opcode::SelectU1:
+    case IR::Opcode::SelectU8:
+    case IR::Opcode::SelectU16:
+    case IR::Opcode::SelectU32:
+    case IR::Opcode::SelectU64:
+    case IR::Opcode::SelectF16:
+    case IR::Opcode::SelectF32:
+    case IR::Opcode::SelectF64:
+        return FoldSelect(inst);
+    case IR::Opcode::FPMul32:
+        return FoldFPMul32(inst);
+    case IR::Opcode::LogicalAnd:
+        return FoldLogicalAnd(inst);
+    case IR::Opcode::LogicalOr:
+        return FoldLogicalOr(inst);
+    case IR::Opcode::LogicalNot:
+        return FoldLogicalNot(inst);
+    case IR::Opcode::SLessThan:
+        FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
+        return;
+    case IR::Opcode::ULessThan:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
+        return;
+    case IR::Opcode::SLessThanEqual:
+        FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
+        return;
+    case IR::Opcode::ULessThanEqual:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
+        return;
+    case IR::Opcode::SGreaterThan:
+        FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
+        return;
+    case IR::Opcode::UGreaterThan:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
+        return;
+    case IR::Opcode::SGreaterThanEqual:
+        FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
+        return;
+    case IR::Opcode::UGreaterThanEqual:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
+        return;
+    case IR::Opcode::IEqual:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
+        return;
+    case IR::Opcode::INotEqual:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
+        return;
+    case IR::Opcode::BitwiseAnd32:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
+        return;
+    case IR::Opcode::BitwiseOr32:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
+        return;
+    case IR::Opcode::BitwiseXor32:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
+        return;
+    case IR::Opcode::BitFieldUExtract:
+        FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
+            if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
+                throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
+                                 base, shift, count);
+            }
+            return (base >> shift) & ((1U << count) - 1);
+        });
+        return;
+    case IR::Opcode::BitFieldSExtract:
+        FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
+            const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
+            const size_t left_shift{32 - back_shift};
+            const size_t right_shift{static_cast<size_t>(32 - count)};
+            if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
+                throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
+                                 base, shift, count);
+            }
+            return static_cast<u32>((base << left_shift) >> right_shift);
+        });
+        return;
+    case IR::Opcode::BitFieldInsert:
+        FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
+            if (bits >= 32 || offset >= 32) {
+                throw LogicError("Undefined result in {}({}, {}, {}, {})",
+                                 IR::Opcode::BitFieldInsert, base, insert, offset, bits);
+            }
+            return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
+        });
+        return;
+    case IR::Opcode::CompositeExtractU32x2:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
+                                    IR::Opcode::CompositeInsertU32x2);
+    case IR::Opcode::CompositeExtractU32x3:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
+                                    IR::Opcode::CompositeInsertU32x3);
+    case IR::Opcode::CompositeExtractU32x4:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
+                                    IR::Opcode::CompositeInsertU32x4);
+    case IR::Opcode::CompositeExtractF32x2:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
+                                    IR::Opcode::CompositeInsertF32x2);
+    case IR::Opcode::CompositeExtractF32x3:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
+                                    IR::Opcode::CompositeInsertF32x3);
+    case IR::Opcode::CompositeExtractF32x4:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
+                                    IR::Opcode::CompositeInsertF32x4);
+    case IR::Opcode::CompositeExtractF16x2:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
+                                    IR::Opcode::CompositeInsertF16x2);
+    case IR::Opcode::CompositeExtractF16x3:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
+                                    IR::Opcode::CompositeInsertF16x3);
+    case IR::Opcode::CompositeExtractF16x4:
+        return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
+                                    IR::Opcode::CompositeInsertF16x4);
+    case IR::Opcode::FSwizzleAdd:
+        return FoldFSwizzleAdd(block, inst);
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void ConstantPropagationPass(IR::Program& program) {
+    const auto end{program.post_order_blocks.rend()};
+    for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
+        IR::Block* const block{*it};
+        for (IR::Inst& inst : block->Instructions()) {
+            ConstantPropagation(*block, inst);
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
new file mode 100644
index 000000000..400836301
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void DeadCodeEliminationPass(IR::Program& program) {
+    // We iterate over the instructions in reverse order.
+    // This is because removing an instruction reduces the number of uses for earlier instructions.
+    for (IR::Block* const block : program.post_order_blocks) {
+        auto it{block->end()};
+        while (it != block->begin()) {
+            --it;
+            if (!it->HasUses() && !it->MayHaveSideEffects()) {
+                it->Invalidate();
+                it = block->Instructions().erase(it);
+            }
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
new file mode 100644
index 000000000..055ba9c54
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void VertexATransformPass(IR::Program& program) {
+    for (IR::Block* const block : program.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            if (inst.GetOpcode() == IR::Opcode::Epilogue) {
+                return inst.Invalidate();
+            }
+        }
+    }
+}
+
+void VertexBTransformPass(IR::Program& program) {
+    for (IR::Block* const block : program.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            if (inst.GetOpcode() == IR::Opcode::Prologue) {
+                return inst.Invalidate();
+            }
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 000000000..4197b0095
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,526 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <compare>
+#include <optional>
+#include <queue>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "common/alignment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/breadth_first_search.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+/// Address in constant buffers to the storage buffer descriptor
+struct StorageBufferAddr {
+    auto operator<=>(const StorageBufferAddr&) const noexcept = default;
+
+    u32 index;
+    u32 offset;
+};
+
+/// Block iterator to a global memory instruction and the storage buffer it uses
+struct StorageInst {
+    StorageBufferAddr storage_buffer;
+    IR::Inst* inst;
+    IR::Block* block;
+};
+
+/// Bias towards a certain range of constant buffers when looking for storage buffers
+struct Bias {
+    u32 index;
+    u32 offset_begin;
+    u32 offset_end;
+};
+
+using boost::container::flat_set;
+using boost::container::small_vector;
+using StorageBufferSet =
+    flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
+using StorageInstVector = small_vector<StorageInst, 24>;
+using StorageWritesSet =
+    flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
+
+struct StorageInfo {
+    StorageBufferSet set;
+    StorageInstVector to_replace;
+    StorageWritesSet writes;
+};
+
+/// Returns true when the instruction is a global memory instruction
+bool IsGlobalMemory(const IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobal32:
+    case IR::Opcode::LoadGlobal64:
+    case IR::Opcode::LoadGlobal128:
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+    case IR::Opcode::GlobalAtomicIAdd32:
+    case IR::Opcode::GlobalAtomicSMin32:
+    case IR::Opcode::GlobalAtomicUMin32:
+    case IR::Opcode::GlobalAtomicSMax32:
+    case IR::Opcode::GlobalAtomicUMax32:
+    case IR::Opcode::GlobalAtomicInc32:
+    case IR::Opcode::GlobalAtomicDec32:
+    case IR::Opcode::GlobalAtomicAnd32:
+    case IR::Opcode::GlobalAtomicOr32:
+    case IR::Opcode::GlobalAtomicXor32:
+    case IR::Opcode::GlobalAtomicExchange32:
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::GlobalAtomicAddF32:
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::GlobalAtomicAddF32x2:
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::GlobalAtomicMinF32x2:
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+        return true;
+    default:
+        return false;
+    }
+}
+
+/// Returns true when the instruction is a global memory instruction
+bool IsGlobalMemoryWrite(const IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+    case IR::Opcode::GlobalAtomicIAdd32:
+    case IR::Opcode::GlobalAtomicSMin32:
+    case IR::Opcode::GlobalAtomicUMin32:
+    case IR::Opcode::GlobalAtomicSMax32:
+    case IR::Opcode::GlobalAtomicUMax32:
+    case IR::Opcode::GlobalAtomicInc32:
+    case IR::Opcode::GlobalAtomicDec32:
+    case IR::Opcode::GlobalAtomicAnd32:
+    case IR::Opcode::GlobalAtomicOr32:
+    case IR::Opcode::GlobalAtomicXor32:
+    case IR::Opcode::GlobalAtomicExchange32:
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::GlobalAtomicAddF32:
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::GlobalAtomicAddF32x2:
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::GlobalAtomicMinF32x2:
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+        return true;
+    default:
+        return false;
+    }
+}
+
+/// Converts a global memory opcode to its storage buffer equivalent
+IR::Opcode GlobalToStorage(IR::Opcode opcode) {
+    switch (opcode) {
+    case IR::Opcode::LoadGlobalS8:
+        return IR::Opcode::LoadStorageS8;
+    case IR::Opcode::LoadGlobalU8:
+        return IR::Opcode::LoadStorageU8;
+    case IR::Opcode::LoadGlobalS16:
+        return IR::Opcode::LoadStorageS16;
+    case IR::Opcode::LoadGlobalU16:
+        return IR::Opcode::LoadStorageU16;
+    case IR::Opcode::LoadGlobal32:
+        return IR::Opcode::LoadStorage32;
+    case IR::Opcode::LoadGlobal64:
+        return IR::Opcode::LoadStorage64;
+    case IR::Opcode::LoadGlobal128:
+        return IR::Opcode::LoadStorage128;
+    case IR::Opcode::WriteGlobalS8:
+        return IR::Opcode::WriteStorageS8;
+    case IR::Opcode::WriteGlobalU8:
+        return IR::Opcode::WriteStorageU8;
+    case IR::Opcode::WriteGlobalS16:
+        return IR::Opcode::WriteStorageS16;
+    case IR::Opcode::WriteGlobalU16:
+        return IR::Opcode::WriteStorageU16;
+    case IR::Opcode::WriteGlobal32:
+        return IR::Opcode::WriteStorage32;
+    case IR::Opcode::WriteGlobal64:
+        return IR::Opcode::WriteStorage64;
+    case IR::Opcode::WriteGlobal128:
+        return IR::Opcode::WriteStorage128;
+    case IR::Opcode::GlobalAtomicIAdd32:
+        return IR::Opcode::StorageAtomicIAdd32;
+    case IR::Opcode::GlobalAtomicSMin32:
+        return IR::Opcode::StorageAtomicSMin32;
+    case IR::Opcode::GlobalAtomicUMin32:
+        return IR::Opcode::StorageAtomicUMin32;
+    case IR::Opcode::GlobalAtomicSMax32:
+        return IR::Opcode::StorageAtomicSMax32;
+    case IR::Opcode::GlobalAtomicUMax32:
+        return IR::Opcode::StorageAtomicUMax32;
+    case IR::Opcode::GlobalAtomicInc32:
+        return IR::Opcode::StorageAtomicInc32;
+    case IR::Opcode::GlobalAtomicDec32:
+        return IR::Opcode::StorageAtomicDec32;
+    case IR::Opcode::GlobalAtomicAnd32:
+        return IR::Opcode::StorageAtomicAnd32;
+    case IR::Opcode::GlobalAtomicOr32:
+        return IR::Opcode::StorageAtomicOr32;
+    case IR::Opcode::GlobalAtomicXor32:
+        return IR::Opcode::StorageAtomicXor32;
+    case IR::Opcode::GlobalAtomicIAdd64:
+        return IR::Opcode::StorageAtomicIAdd64;
+    case IR::Opcode::GlobalAtomicSMin64:
+        return IR::Opcode::StorageAtomicSMin64;
+    case IR::Opcode::GlobalAtomicUMin64:
+        return IR::Opcode::StorageAtomicUMin64;
+    case IR::Opcode::GlobalAtomicSMax64:
+        return IR::Opcode::StorageAtomicSMax64;
+    case IR::Opcode::GlobalAtomicUMax64:
+        return IR::Opcode::StorageAtomicUMax64;
+    case IR::Opcode::GlobalAtomicAnd64:
+        return IR::Opcode::StorageAtomicAnd64;
+    case IR::Opcode::GlobalAtomicOr64:
+        return IR::Opcode::StorageAtomicOr64;
+    case IR::Opcode::GlobalAtomicXor64:
+        return IR::Opcode::StorageAtomicXor64;
+    case IR::Opcode::GlobalAtomicExchange32:
+        return IR::Opcode::StorageAtomicExchange32;
+    case IR::Opcode::GlobalAtomicExchange64:
+        return IR::Opcode::StorageAtomicExchange64;
+    case IR::Opcode::GlobalAtomicAddF32:
+        return IR::Opcode::StorageAtomicAddF32;
+    case IR::Opcode::GlobalAtomicAddF16x2:
+        return IR::Opcode::StorageAtomicAddF16x2;
+    case IR::Opcode::GlobalAtomicMinF16x2:
+        return IR::Opcode::StorageAtomicMinF16x2;
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+        return IR::Opcode::StorageAtomicMaxF16x2;
+    case IR::Opcode::GlobalAtomicAddF32x2:
+        return IR::Opcode::StorageAtomicAddF32x2;
+    case IR::Opcode::GlobalAtomicMinF32x2:
+        return IR::Opcode::StorageAtomicMinF32x2;
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+        return IR::Opcode::StorageAtomicMaxF32x2;
+    default:
+        throw InvalidArgument("Invalid global memory opcode {}", opcode);
+    }
+}
+
+/// Returns true when a storage buffer address satisfies a bias
+bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
+    return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
+           storage_buffer.offset < bias.offset_end;
+}
+
+struct LowAddrInfo {
+    IR::U32 value;
+    s32 imm_offset;
+};
+
+/// Tries to track the first 32-bits of a global memory instruction
+std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
+    // The first argument is the low level GPU pointer to the global memory instruction
+    const IR::Value addr{inst->Arg(0)};
+    if (addr.IsImmediate()) {
+        // Not much we can do if it's an immediate
+        return std::nullopt;
+    }
+    // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
+    IR::Inst* addr_inst{addr.InstRecursive()};
+    s32 imm_offset{0};
+    if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
+        // If it's an IAdd64, get the immediate offset it is applying and grab the address
+        // instruction. This expects for the instruction to be canonicalized having the address on
+        // the first argument and the immediate offset on the second one.
+        const IR::U64 imm_offset_value{addr_inst->Arg(1)};
+        if (!imm_offset_value.IsImmediate()) {
+            return std::nullopt;
+        }
+        imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
+        const IR::U64 iadd_addr{addr_inst->Arg(0)};
+        if (iadd_addr.IsImmediate()) {
+            return std::nullopt;
+        }
+        addr_inst = iadd_addr.InstRecursive();
+    }
+    // With IAdd64 handled, now PackUint2x32 is expected
+    if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
+        // PackUint2x32 is expected to be generated from a vector
+        const IR::Value vector{addr_inst->Arg(0)};
+        if (vector.IsImmediate()) {
+            return std::nullopt;
+        }
+        addr_inst = vector.InstRecursive();
+    }
+    // The vector is expected to be a CompositeConstructU32x2
+    if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
+        return std::nullopt;
+    }
+    // Grab the first argument from the CompositeConstructU32x2, this is the low address.
+    return LowAddrInfo{
+        .value{IR::U32{addr_inst->Arg(0)}},
+        .imm_offset = imm_offset,
+    };
+}
+
+/// Tries to track the storage buffer address used by a global memory instruction
+std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
+    const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
+        if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
+            return std::nullopt;
+        }
+        const IR::Value index{inst->Arg(0)};
+        const IR::Value offset{inst->Arg(1)};
+        if (!index.IsImmediate()) {
+            // Definitely not a storage buffer if it's read from a
+            // non-immediate index
+            return std::nullopt;
+        }
+        if (!offset.IsImmediate()) {
+            // TODO: Support SSBO arrays
+            return std::nullopt;
+        }
+        const StorageBufferAddr storage_buffer{
+            .index = index.U32(),
+            .offset = offset.U32(),
+        };
+        if (!Common::IsAligned(storage_buffer.offset, 16)) {
+            // The SSBO pointer has to be aligned
+            return std::nullopt;
+        }
+        if (bias && !MeetsBias(storage_buffer, *bias)) {
+            // We have to blacklist some addresses in case we wrongly
+            // point to them
+            return std::nullopt;
+        }
+        return storage_buffer;
+    }};
+    return BreadthFirstSearch(value, pred);
+}
+
+/// Collects the storage buffer used by a global memory instruction and the instruction itself
+void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
+    // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
+    // avoid getting false positives
+    static constexpr Bias nvn_bias{
+        .index = 0,
+        .offset_begin = 0x110,
+        .offset_end = 0x610,
+    };
+    // Track the low address of the instruction
+    const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
+    if (!low_addr_info) {
+        // Failed to track the low address, use NVN fallbacks
+        return;
+    }
+    // First try to find storage buffers in the NVN address
+    const IR::U32 low_addr{low_addr_info->value};
+    std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
+    if (!storage_buffer) {
+        // If it fails, track without a bias
+        storage_buffer = Track(low_addr, nullptr);
+        if (!storage_buffer) {
+            // If that also fails, use NVN fallbacks
+            return;
+        }
+    }
+    // Collect storage buffer and the instruction
+    if (IsGlobalMemoryWrite(inst)) {
+        info.writes.insert(*storage_buffer);
+    }
+    info.set.insert(*storage_buffer);
+    info.to_replace.push_back(StorageInst{
+        .storage_buffer{*storage_buffer},
+        .inst = &inst,
+        .block = &block,
+    });
+}
+
+/// Returns the offset in indices (not bytes) for an equivalent storage instruction
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    IR::U32 offset;
+    if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
+        offset = low_addr->value;
+        if (low_addr->imm_offset != 0) {
+            offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
+        }
+    } else {
+        offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
+    }
+    // Subtract the least significant 32 bits from the guest offset. The result is the storage
+    // buffer offset in bytes.
+    const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+    return ir.ISub(offset, low_cbuf);
+}
+
+/// Replace a global memory load instruction with its storage buffer equivalent
+void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+                 const IR::U32& offset) {
+    const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+    const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+    const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
+    inst.ReplaceUsesWith(value);
+}
+
+/// Replace a global memory write instruction with its storage buffer equivalent
+void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+                  const IR::U32& offset) {
+    const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+    const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+    block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
+    inst.Invalidate();
+}
+
+/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
+void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+                   const IR::U32& offset) {
+    const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+    const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+    const IR::Value value{
+        &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
+    inst.ReplaceUsesWith(value);
+}
+
+/// Replace a global memory instruction with its storage buffer equivalent
+void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+             const IR::U32& offset) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobal32:
+    case IR::Opcode::LoadGlobal64:
+    case IR::Opcode::LoadGlobal128:
+        return ReplaceLoad(block, inst, storage_index, offset);
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+        return ReplaceWrite(block, inst, storage_index, offset);
+    case IR::Opcode::GlobalAtomicIAdd32:
+    case IR::Opcode::GlobalAtomicSMin32:
+    case IR::Opcode::GlobalAtomicUMin32:
+    case IR::Opcode::GlobalAtomicSMax32:
+    case IR::Opcode::GlobalAtomicUMax32:
+    case IR::Opcode::GlobalAtomicInc32:
+    case IR::Opcode::GlobalAtomicDec32:
+    case IR::Opcode::GlobalAtomicAnd32:
+    case IR::Opcode::GlobalAtomicOr32:
+    case IR::Opcode::GlobalAtomicXor32:
+    case IR::Opcode::GlobalAtomicExchange32:
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::GlobalAtomicAddF32:
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::GlobalAtomicAddF32x2:
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::GlobalAtomicMinF32x2:
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+        return ReplaceAtomic(block, inst, storage_index, offset);
+    default:
+        throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
+    }
+}
+} // Anonymous namespace
+
+void GlobalMemoryToStorageBufferPass(IR::Program& program) {
+    StorageInfo info;
+    for (IR::Block* const block : program.post_order_blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            if (!IsGlobalMemory(inst)) {
+                continue;
+            }
+            CollectStorageBuffers(*block, inst, info);
+        }
+    }
+    for (const StorageBufferAddr& storage_buffer : info.set) {
+        program.info.storage_buffers_descriptors.push_back({
+            .cbuf_index = storage_buffer.index,
+            .cbuf_offset = storage_buffer.offset,
+            .count = 1,
+            .is_written = info.writes.contains(storage_buffer),
+        });
+    }
+    for (const StorageInst& storage_inst : info.to_replace) {
+        const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
+        const auto it{info.set.find(storage_inst.storage_buffer)};
+        const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
+        IR::Block* const block{storage_inst.block};
+        IR::Inst* const inst{storage_inst.inst};
+        const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
+        Replace(*block, *inst, index, offset);
+    }
+}
+
+template <typename Descriptors, typename Descriptor, typename Func>
+static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+    // TODO: Handle arrays
+    const auto it{std::ranges::find_if(descriptors, pred)};
+    if (it != descriptors.end()) {
+        return static_cast<u32>(std::distance(descriptors.begin(), it));
+    }
+    descriptors.push_back(desc);
+    return static_cast<u32>(descriptors.size()) - 1;
+}
+
+void JoinStorageInfo(Info& base, Info& source) {
+    auto& descriptors = base.storage_buffers_descriptors;
+    for (auto& desc : source.storage_buffers_descriptors) {
+        auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
+            return desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
+        })};
+        if (it != descriptors.end()) {
+            it->is_written |= desc.is_written;
+            continue;
+        }
+        descriptors.push_back(desc);
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
new file mode 100644
index 000000000..e9b55f835
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void IdentityRemovalPass(IR::Program& program) {
+    std::vector<IR::Inst*> to_invalidate;
+    for (IR::Block* const block : program.blocks) {
+        for (auto inst = block->begin(); inst != block->end();) {
+            const size_t num_args{inst->NumArgs()};
+            for (size_t i = 0; i < num_args; ++i) {
+                IR::Value arg;
+                while ((arg = inst->Arg(i)).IsIdentity()) {
+                    inst->SetArg(i, arg.Inst()->Arg(0));
+                }
+            }
+            if (inst->GetOpcode() == IR::Opcode::Identity ||
+                inst->GetOpcode() == IR::Opcode::Void) {
+                to_invalidate.push_back(&*inst);
+                inst = block->Instructions().erase(inst);
+            } else {
+                ++inst;
+            }
+        }
+    }
+    for (IR::Inst* const inst : to_invalidate) {
+        inst->Invalidate();
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
new file mode 100644
index 000000000..773e1f961
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+IR::Opcode Replace(IR::Opcode op) {
+    switch (op) {
+    case IR::Opcode::FPAbs16:
+        return IR::Opcode::FPAbs32;
+    case IR::Opcode::FPAdd16:
+        return IR::Opcode::FPAdd32;
+    case IR::Opcode::FPCeil16:
+        return IR::Opcode::FPCeil32;
+    case IR::Opcode::FPFloor16:
+        return IR::Opcode::FPFloor32;
+    case IR::Opcode::FPFma16:
+        return IR::Opcode::FPFma32;
+    case IR::Opcode::FPMul16:
+        return IR::Opcode::FPMul32;
+    case IR::Opcode::FPNeg16:
+        return IR::Opcode::FPNeg32;
+    case IR::Opcode::FPRoundEven16:
+        return IR::Opcode::FPRoundEven32;
+    case IR::Opcode::FPSaturate16:
+        return IR::Opcode::FPSaturate32;
+    case IR::Opcode::FPClamp16:
+        return IR::Opcode::FPClamp32;
+    case IR::Opcode::FPTrunc16:
+        return IR::Opcode::FPTrunc32;
+    case IR::Opcode::CompositeConstructF16x2:
+        return IR::Opcode::CompositeConstructF32x2;
+    case IR::Opcode::CompositeConstructF16x3:
+        return IR::Opcode::CompositeConstructF32x3;
+    case IR::Opcode::CompositeConstructF16x4:
+        return IR::Opcode::CompositeConstructF32x4;
+    case IR::Opcode::CompositeExtractF16x2:
+        return IR::Opcode::CompositeExtractF32x2;
+    case IR::Opcode::CompositeExtractF16x3:
+        return IR::Opcode::CompositeExtractF32x3;
+    case IR::Opcode::CompositeExtractF16x4:
+        return IR::Opcode::CompositeExtractF32x4;
+    case IR::Opcode::CompositeInsertF16x2:
+        return IR::Opcode::CompositeInsertF32x2;
+    case IR::Opcode::CompositeInsertF16x3:
+        return IR::Opcode::CompositeInsertF32x3;
+    case IR::Opcode::CompositeInsertF16x4:
+        return IR::Opcode::CompositeInsertF32x4;
+    case IR::Opcode::FPOrdEqual16:
+        return IR::Opcode::FPOrdEqual32;
+    case IR::Opcode::FPUnordEqual16:
+        return IR::Opcode::FPUnordEqual32;
+    case IR::Opcode::FPOrdNotEqual16:
+        return IR::Opcode::FPOrdNotEqual32;
+    case IR::Opcode::FPUnordNotEqual16:
+        return IR::Opcode::FPUnordNotEqual32;
+    case IR::Opcode::FPOrdLessThan16:
+        return IR::Opcode::FPOrdLessThan32;
+    case IR::Opcode::FPUnordLessThan16:
+        return IR::Opcode::FPUnordLessThan32;
+    case IR::Opcode::FPOrdGreaterThan16:
+        return IR::Opcode::FPOrdGreaterThan32;
+    case IR::Opcode::FPUnordGreaterThan16:
+        return IR::Opcode::FPUnordGreaterThan32;
+    case IR::Opcode::FPOrdLessThanEqual16:
+        return IR::Opcode::FPOrdLessThanEqual32;
+    case IR::Opcode::FPUnordLessThanEqual16:
+        return IR::Opcode::FPUnordLessThanEqual32;
+    case IR::Opcode::FPOrdGreaterThanEqual16:
+        return IR::Opcode::FPOrdGreaterThanEqual32;
+    case IR::Opcode::FPUnordGreaterThanEqual16:
+        return IR::Opcode::FPUnordGreaterThanEqual32;
+    case IR::Opcode::FPIsNan16:
+        return IR::Opcode::FPIsNan32;
+    case IR::Opcode::ConvertS16F16:
+        return IR::Opcode::ConvertS16F32;
+    case IR::Opcode::ConvertS32F16:
+        return IR::Opcode::ConvertS32F32;
+    case IR::Opcode::ConvertS64F16:
+        return IR::Opcode::ConvertS64F32;
+    case IR::Opcode::ConvertU16F16:
+        return IR::Opcode::ConvertU16F32;
+    case IR::Opcode::ConvertU32F16:
+        return IR::Opcode::ConvertU32F32;
+    case IR::Opcode::ConvertU64F16:
+        return IR::Opcode::ConvertU64F32;
+    case IR::Opcode::PackFloat2x16:
+        return IR::Opcode::PackHalf2x16;
+    case IR::Opcode::UnpackFloat2x16:
+        return IR::Opcode::UnpackHalf2x16;
+    case IR::Opcode::ConvertF32F16:
+        return IR::Opcode::Identity;
+    case IR::Opcode::ConvertF16F32:
+        return IR::Opcode::Identity;
+    case IR::Opcode::ConvertF16S8:
+        return IR::Opcode::ConvertF32S8;
+    case IR::Opcode::ConvertF16S16:
+        return IR::Opcode::ConvertF32S16;
+    case IR::Opcode::ConvertF16S32:
+        return IR::Opcode::ConvertF32S32;
+    case IR::Opcode::ConvertF16S64:
+        return IR::Opcode::ConvertF32S64;
+    case IR::Opcode::ConvertF16U8:
+        return IR::Opcode::ConvertF32U8;
+    case IR::Opcode::ConvertF16U16:
+        return IR::Opcode::ConvertF32U16;
+    case IR::Opcode::ConvertF16U32:
+        return IR::Opcode::ConvertF32U32;
+    case IR::Opcode::ConvertF16U64:
+        return IR::Opcode::ConvertF32U64;
+    case IR::Opcode::GlobalAtomicAddF16x2:
+        return IR::Opcode::GlobalAtomicAddF32x2;
+    case IR::Opcode::StorageAtomicAddF16x2:
+        return IR::Opcode::StorageAtomicAddF32x2;
+    case IR::Opcode::GlobalAtomicMinF16x2:
+        return IR::Opcode::GlobalAtomicMinF32x2;
+    case IR::Opcode::StorageAtomicMinF16x2:
+        return IR::Opcode::StorageAtomicMinF32x2;
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+        return IR::Opcode::GlobalAtomicMaxF32x2;
+    case IR::Opcode::StorageAtomicMaxF16x2:
+        return IR::Opcode::StorageAtomicMaxF32x2;
+    default:
+        return op;
+    }
+}
+} // Anonymous namespace
+
+void LowerFp16ToFp32(IR::Program& program) {
+    for (IR::Block* const block : program.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            inst.ReplaceOpcode(Replace(inst.GetOpcode()));
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
new file mode 100644
index 000000000..e80d3d1d9
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -0,0 +1,218 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
+    if (packed.IsImmediate()) {
+        const u64 value{packed.U64()};
+        return {
+            ir.Imm32(static_cast<u32>(value)),
+            ir.Imm32(static_cast<u32>(value >> 32)),
+        };
+    } else {
+        return std::pair<IR::U32, IR::U32>{
+            ir.CompositeExtract(packed, 0u),
+            ir.CompositeExtract(packed, 1u),
+        };
+    }
+}
+
+void IAdd64To32(IR::Block& block, IR::Inst& inst) {
+    if (inst.HasAssociatedPseudoOperation()) {
+        throw NotImplementedException("IAdd64 emulation with pseudo instructions");
+    }
+    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+    const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
+    const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
+
+    const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
+    const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
+
+    const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
+    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ISub64To32(IR::Block& block, IR::Inst& inst) {
+    if (inst.HasAssociatedPseudoOperation()) {
+        throw NotImplementedException("ISub64 emulation with pseudo instructions");
+    }
+    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+    const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
+    const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
+
+    const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
+    const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
+    const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
+
+    const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
+    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void INeg64To32(IR::Block& block, IR::Inst& inst) {
+    if (inst.HasAssociatedPseudoOperation()) {
+        throw NotImplementedException("INeg64 emulation with pseudo instructions");
+    }
+    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+    auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+    lo = ir.BitwiseNot(lo);
+    hi = ir.BitwiseNot(hi);
+
+    lo = ir.IAdd(lo, ir.Imm32(1));
+
+    const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
+    hi = ir.IAdd(hi, carry);
+
+    inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
+}
+
+void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
+    if (inst.HasAssociatedPseudoOperation()) {
+        throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
+    }
+    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+    const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+    const IR::U32 shift{inst.Arg(1)};
+
+    const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
+    const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
+
+    const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+    const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+    const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+    const IR::U32 long_ret_lo{ir.Imm32(0)};
+    const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
+
+    const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+    const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
+    const IR::U32 short_ret_lo{shifted_lo};
+    const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
+
+    const IR::U32 zero_ret_lo{lo};
+    const IR::U32 zero_ret_hi{hi};
+
+    const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+    const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+    const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+    const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
+    if (inst.HasAssociatedPseudoOperation()) {
+        throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
+    }
+    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+    const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+    const IR::U32 shift{inst.Arg(1)};
+
+    const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
+    const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
+
+    const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+    const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+    const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+    const IR::U32 long_ret_hi{ir.Imm32(0)};
+    const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
+
+    const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+    const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
+    const IR::U32 short_ret_hi{shifted_hi};
+    const IR::U32 short_ret_lo{
+        ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
+
+    const IR::U32 zero_ret_lo{lo};
+    const IR::U32 zero_ret_hi{hi};
+
+    const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+    const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+    const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+    const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
+    if (inst.HasAssociatedPseudoOperation()) {
+        throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
+    }
+    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+    const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+    const IR::U32 shift{inst.Arg(1)};
+
+    const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
+    const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
+
+    const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
+
+    const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+    const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+    const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+    const IR::U32 long_ret_hi{sign_extension};
+    const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
+
+    const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+    const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
+    const IR::U32 short_ret_hi{shifted_hi};
+    const IR::U32 short_ret_lo{
+        ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
+
+    const IR::U32 zero_ret_lo{lo};
+    const IR::U32 zero_ret_hi{hi};
+
+    const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+    const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+    const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+    const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void Lower(IR::Block& block, IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::PackUint2x32:
+    case IR::Opcode::UnpackUint2x32:
+        return inst.ReplaceOpcode(IR::Opcode::Identity);
+    case IR::Opcode::IAdd64:
+        return IAdd64To32(block, inst);
+    case IR::Opcode::ISub64:
+        return ISub64To32(block, inst);
+    case IR::Opcode::INeg64:
+        return INeg64To32(block, inst);
+    case IR::Opcode::ShiftLeftLogical64:
+        return ShiftLeftLogical64To32(block, inst);
+    case IR::Opcode::ShiftRightLogical64:
+        return ShiftRightLogical64To32(block, inst);
+    case IR::Opcode::ShiftRightArithmetic64:
+        return ShiftRightArithmetic64To32(block, inst);
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void LowerInt64ToInt32(IR::Program& program) {
+    const auto end{program.post_order_blocks.rend()};
+    for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
+        IR::Block* const block{*it};
+        for (IR::Inst& inst : block->Instructions()) {
+            Lower(*block, inst);
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
new file mode 100644
index 000000000..2f89b1ea0
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+
+namespace Shader::Optimization {
+
+void CollectShaderInfoPass(Environment& env, IR::Program& program);
+void ConstantPropagationPass(IR::Program& program);
+void DeadCodeEliminationPass(IR::Program& program);
+void GlobalMemoryToStorageBufferPass(IR::Program& program);
+void IdentityRemovalPass(IR::Program& program);
+void LowerFp16ToFp32(IR::Program& program);
+void LowerInt64ToInt32(IR::Program& program);
+void SsaRewritePass(IR::Program& program);
+void TexturePass(Environment& env, IR::Program& program);
+void VerificationPass(const IR::Program& program);
+
+// Dual Vertex
+void VertexATransformPass(IR::Program& program);
+void VertexBTransformPass(IR::Program& program);
+void JoinTextureInfo(Info& base, Info& source);
+void JoinStorageInfo(Info& base, Info& source);
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
new file mode 100644
index 000000000..53145fb5e
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -0,0 +1,383 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This file implements the SSA rewriting algorithm proposed in
+//
+//      Simple and Efficient Construction of Static Single Assignment Form.
+//      Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
+//      In: Jhala R., De Bosschere K. (eds)
+//      Compiler Construction. CC 2013.
+//      Lecture Notes in Computer Science, vol 7791.
+//      Springer, Berlin, Heidelberg
+//
+//      https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
+//
+
+#include <span>
+#include <variant>
+#include <vector>
+
+#include <boost/container/flat_map.hpp>
+#include <boost/container/flat_set.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+struct FlagTag {
+    auto operator<=>(const FlagTag&) const noexcept = default;
+};
+struct ZeroFlagTag : FlagTag {};
+struct SignFlagTag : FlagTag {};
+struct CarryFlagTag : FlagTag {};
+struct OverflowFlagTag : FlagTag {};
+
+struct GotoVariable : FlagTag {
+    GotoVariable() = default;
+    explicit GotoVariable(u32 index_) : index{index_} {}
+
+    auto operator<=>(const GotoVariable&) const noexcept = default;
+
+    u32 index;
+};
+
+struct IndirectBranchVariable {
+    auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
+};
+
+using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
+                             OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
+using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
+
+struct DefTable {
+    const IR::Value& Def(IR::Block* block, IR::Reg variable) {
+        return block->SsaRegValue(variable);
+    }
+    void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
+        block->SetSsaRegValue(variable, value);
+    }
+
+    const IR::Value& Def(IR::Block* block, IR::Pred variable) {
+        return preds[IR::PredIndex(variable)][block];
+    }
+    void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
+        preds[IR::PredIndex(variable)].insert_or_assign(block, value);
+    }
+
+    const IR::Value& Def(IR::Block* block, GotoVariable variable) {
+        return goto_vars[variable.index][block];
+    }
+    void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
+        goto_vars[variable.index].insert_or_assign(block, value);
+    }
+
+    const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
+        return indirect_branch_var[block];
+    }
+    void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
+        indirect_branch_var.insert_or_assign(block, value);
+    }
+
+    const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
+        return zero_flag[block];
+    }
+    void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
+        zero_flag.insert_or_assign(block, value);
+    }
+
+    const IR::Value& Def(IR::Block* block, SignFlagTag) {
+        return sign_flag[block];
+    }
+    void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
+        sign_flag.insert_or_assign(block, value);
+    }
+
+    const IR::Value& Def(IR::Block* block, CarryFlagTag) {
+        return carry_flag[block];
+    }
+    void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
+        carry_flag.insert_or_assign(block, value);
+    }
+
+    const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
+        return overflow_flag[block];
+    }
+    void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
+        overflow_flag.insert_or_assign(block, value);
+    }
+
+    std::array<ValueMap, IR::NUM_USER_PREDS> preds;
+    boost::container::flat_map<u32, ValueMap> goto_vars;
+    ValueMap indirect_branch_var;
+    ValueMap zero_flag;
+    ValueMap sign_flag;
+    ValueMap carry_flag;
+    ValueMap overflow_flag;
+};
+
+IR::Opcode UndefOpcode(IR::Reg) noexcept {
+    return IR::Opcode::UndefU32;
+}
+
+IR::Opcode UndefOpcode(IR::Pred) noexcept {
+    return IR::Opcode::UndefU1;
+}
+
+IR::Opcode UndefOpcode(const FlagTag&) noexcept {
+    return IR::Opcode::UndefU1;
+}
+
+IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
+    return IR::Opcode::UndefU32;
+}
+
+enum class Status {
+    Start,
+    SetValue,
+    PreparePhiArgument,
+    PushPhiArgument,
+};
+
+template <typename Type>
+struct ReadState {
+    ReadState(IR::Block* block_) : block{block_} {}
+    ReadState() = default;
+
+    IR::Block* block{};
+    IR::Value result{};
+    IR::Inst* phi{};
+    IR::Block* const* pred_it{};
+    IR::Block* const* pred_end{};
+    Status pc{Status::Start};
+};
+
+class Pass {
+public:
+    template <typename Type>
+    void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
+        current_def.SetDef(block, variable, value);
+    }
+
+    template <typename Type>
+    IR::Value ReadVariable(Type variable, IR::Block* root_block) {
+        boost::container::small_vector<ReadState<Type>, 64> stack{
+            ReadState<Type>(nullptr),
+            ReadState<Type>(root_block),
+        };
+        const auto prepare_phi_operand{[&] {
+            if (stack.back().pred_it == stack.back().pred_end) {
+                IR::Inst* const phi{stack.back().phi};
+                IR::Block* const block{stack.back().block};
+                const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
+                stack.pop_back();
+                stack.back().result = result;
+                WriteVariable(variable, block, result);
+            } else {
+                IR::Block* const imm_pred{*stack.back().pred_it};
+                stack.back().pc = Status::PushPhiArgument;
+                stack.emplace_back(imm_pred);
+            }
+        }};
+        do {
+            IR::Block* const block{stack.back().block};
+            switch (stack.back().pc) {
+            case Status::Start: {
+                if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
+                    stack.back().result = def;
+                } else if (!block->IsSsaSealed()) {
+                    // Incomplete CFG
+                    IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+                    phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
+
+                    incomplete_phis[block].insert_or_assign(variable, phi);
+                    stack.back().result = IR::Value{&*phi};
+                } else if (const std::span imm_preds = block->ImmPredecessors();
+                           imm_preds.size() == 1) {
+                    // Optimize the common case of one predecessor: no phi needed
+                    stack.back().pc = Status::SetValue;
+                    stack.emplace_back(imm_preds.front());
+                    break;
+                } else {
+                    // Break potential cycles with operandless phi
+                    IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+                    phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
+
+                    WriteVariable(variable, block, IR::Value{phi});
+
+                    stack.back().phi = phi;
+                    stack.back().pred_it = imm_preds.data();
+                    stack.back().pred_end = imm_preds.data() + imm_preds.size();
+                    prepare_phi_operand();
+                    break;
+                }
+            }
+                [[fallthrough]];
+            case Status::SetValue: {
+                const IR::Value result{stack.back().result};
+                WriteVariable(variable, block, result);
+                stack.pop_back();
+                stack.back().result = result;
+                break;
+            }
+            case Status::PushPhiArgument: {
+                IR::Inst* const phi{stack.back().phi};
+                phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
+                ++stack.back().pred_it;
+            }
+                [[fallthrough]];
+            case Status::PreparePhiArgument:
+                prepare_phi_operand();
+                break;
+            }
+        } while (stack.size() > 1);
+        return stack.back().result;
+    }
+
+    void SealBlock(IR::Block* block) {
+        const auto it{incomplete_phis.find(block)};
+        if (it != incomplete_phis.end()) {
+            for (auto& pair : it->second) {
+                auto& variant{pair.first};
+                auto& phi{pair.second};
+                std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
+            }
+        }
+        block->SsaSeal();
+    }
+
+private:
+    template <typename Type>
+    IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
+        for (IR::Block* const imm_pred : block->ImmPredecessors()) {
+            phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
+        }
+        return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
+    }
+
+    IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
+        IR::Value same;
+        const size_t num_args{phi.NumArgs()};
+        for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
+            const IR::Value& op{phi.Arg(arg_index)};
+            if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
+                // Unique value or self-reference
+                continue;
+            }
+            if (!same.IsEmpty()) {
+                // The phi merges at least two values: not trivial
+                return IR::Value{&phi};
+            }
+            same = op;
+        }
+        // Remove the phi node from the block, it will be reinserted
+        IR::Block::InstructionList& list{block->Instructions()};
+        list.erase(IR::Block::InstructionList::s_iterator_to(phi));
+
+        // Find the first non-phi instruction and use it as an insertion point
+        IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
+        if (same.IsEmpty()) {
+            // The phi is unreachable or in the start block
+            // Insert an undefined instruction and make it the phi node replacement
+            // The "phi" node reinsertion point is specified after this instruction
+            reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
+            same = IR::Value{&*reinsert_point};
+            ++reinsert_point;
+        }
+        // Reinsert the phi node and reroute all its uses to the "same" value
+        list.insert(reinsert_point, phi);
+        phi.ReplaceUsesWith(same);
+        // TODO: Try to recursively remove all phi users, which might have become trivial
+        return same;
+    }
+
+    boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
+        incomplete_phis;
+    DefTable current_def;
+};
+
+void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::SetRegister:
+        if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+            pass.WriteVariable(reg, block, inst.Arg(1));
+        }
+        break;
+    case IR::Opcode::SetPred:
+        if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+            pass.WriteVariable(pred, block, inst.Arg(1));
+        }
+        break;
+    case IR::Opcode::SetGotoVariable:
+        pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
+        break;
+    case IR::Opcode::SetIndirectBranchVariable:
+        pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::SetZFlag:
+        pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::SetSFlag:
+        pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::SetCFlag:
+        pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::SetOFlag:
+        pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::GetRegister:
+        if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+            inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
+        }
+        break;
+    case IR::Opcode::GetPred:
+        if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+            inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
+        }
+        break;
+    case IR::Opcode::GetGotoVariable:
+        inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
+        break;
+    case IR::Opcode::GetIndirectBranchVariable:
+        inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
+        break;
+    case IR::Opcode::GetZFlag:
+        inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
+        break;
+    case IR::Opcode::GetSFlag:
+        inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
+        break;
+    case IR::Opcode::GetCFlag:
+        inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
+        break;
+    case IR::Opcode::GetOFlag:
+        inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
+        break;
+    default:
+        break;
+    }
+}
+
+void VisitBlock(Pass& pass, IR::Block* block) {
+    for (IR::Inst& inst : block->Instructions()) {
+        VisitInst(pass, block, inst);
+    }
+    pass.SealBlock(block);
+}
+} // Anonymous namespace
+
+void SsaRewritePass(IR::Program& program) {
+    Pass pass;
+    const auto end{program.post_order_blocks.rend()};
+    for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
+        VisitBlock(pass, *block);
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
new file mode 100644
index 000000000..44ad10d43
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -0,0 +1,523 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <bit>
+#include <optional>
+
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/breadth_first_search.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+struct ConstBufferAddr {
+    u32 index;
+    u32 offset;
+    u32 secondary_index;
+    u32 secondary_offset;
+    IR::U32 dynamic_offset;
+    u32 count;
+    bool has_secondary;
+};
+
+struct TextureInst {
+    ConstBufferAddr cbuf;
+    IR::Inst* inst;
+    IR::Block* block;
+};
+
+using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
+
+constexpr u32 DESCRIPTOR_SIZE = 8;
+constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
+
+IR::Opcode IndexedInstruction(const IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::BindlessImageSampleImplicitLod:
+    case IR::Opcode::BoundImageSampleImplicitLod:
+        return IR::Opcode::ImageSampleImplicitLod;
+    case IR::Opcode::BoundImageSampleExplicitLod:
+    case IR::Opcode::BindlessImageSampleExplicitLod:
+        return IR::Opcode::ImageSampleExplicitLod;
+    case IR::Opcode::BoundImageSampleDrefImplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+        return IR::Opcode::ImageSampleDrefImplicitLod;
+    case IR::Opcode::BoundImageSampleDrefExplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+        return IR::Opcode::ImageSampleDrefExplicitLod;
+    case IR::Opcode::BindlessImageGather:
+    case IR::Opcode::BoundImageGather:
+        return IR::Opcode::ImageGather;
+    case IR::Opcode::BindlessImageGatherDref:
+    case IR::Opcode::BoundImageGatherDref:
+        return IR::Opcode::ImageGatherDref;
+    case IR::Opcode::BindlessImageFetch:
+    case IR::Opcode::BoundImageFetch:
+        return IR::Opcode::ImageFetch;
+    case IR::Opcode::BoundImageQueryDimensions:
+    case IR::Opcode::BindlessImageQueryDimensions:
+        return IR::Opcode::ImageQueryDimensions;
+    case IR::Opcode::BoundImageQueryLod:
+    case IR::Opcode::BindlessImageQueryLod:
+        return IR::Opcode::ImageQueryLod;
+    case IR::Opcode::BoundImageGradient:
+    case IR::Opcode::BindlessImageGradient:
+        return IR::Opcode::ImageGradient;
+    case IR::Opcode::BoundImageRead:
+    case IR::Opcode::BindlessImageRead:
+        return IR::Opcode::ImageRead;
+    case IR::Opcode::BoundImageWrite:
+    case IR::Opcode::BindlessImageWrite:
+        return IR::Opcode::ImageWrite;
+    case IR::Opcode::BoundImageAtomicIAdd32:
+    case IR::Opcode::BindlessImageAtomicIAdd32:
+        return IR::Opcode::ImageAtomicIAdd32;
+    case IR::Opcode::BoundImageAtomicSMin32:
+    case IR::Opcode::BindlessImageAtomicSMin32:
+        return IR::Opcode::ImageAtomicSMin32;
+    case IR::Opcode::BoundImageAtomicUMin32:
+    case IR::Opcode::BindlessImageAtomicUMin32:
+        return IR::Opcode::ImageAtomicUMin32;
+    case IR::Opcode::BoundImageAtomicSMax32:
+    case IR::Opcode::BindlessImageAtomicSMax32:
+        return IR::Opcode::ImageAtomicSMax32;
+    case IR::Opcode::BoundImageAtomicUMax32:
+    case IR::Opcode::BindlessImageAtomicUMax32:
+        return IR::Opcode::ImageAtomicUMax32;
+    case IR::Opcode::BoundImageAtomicInc32:
+    case IR::Opcode::BindlessImageAtomicInc32:
+        return IR::Opcode::ImageAtomicInc32;
+    case IR::Opcode::BoundImageAtomicDec32:
+    case IR::Opcode::BindlessImageAtomicDec32:
+        return IR::Opcode::ImageAtomicDec32;
+    case IR::Opcode::BoundImageAtomicAnd32:
+    case IR::Opcode::BindlessImageAtomicAnd32:
+        return IR::Opcode::ImageAtomicAnd32;
+    case IR::Opcode::BoundImageAtomicOr32:
+    case IR::Opcode::BindlessImageAtomicOr32:
+        return IR::Opcode::ImageAtomicOr32;
+    case IR::Opcode::BoundImageAtomicXor32:
+    case IR::Opcode::BindlessImageAtomicXor32:
+        return IR::Opcode::ImageAtomicXor32;
+    case IR::Opcode::BoundImageAtomicExchange32:
+    case IR::Opcode::BindlessImageAtomicExchange32:
+        return IR::Opcode::ImageAtomicExchange32;
+    default:
+        return IR::Opcode::Void;
+    }
+}
+
+bool IsBindless(const IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::BindlessImageSampleImplicitLod:
+    case IR::Opcode::BindlessImageSampleExplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+    case IR::Opcode::BindlessImageGather:
+    case IR::Opcode::BindlessImageGatherDref:
+    case IR::Opcode::BindlessImageFetch:
+    case IR::Opcode::BindlessImageQueryDimensions:
+    case IR::Opcode::BindlessImageQueryLod:
+    case IR::Opcode::BindlessImageGradient:
+    case IR::Opcode::BindlessImageRead:
+    case IR::Opcode::BindlessImageWrite:
+    case IR::Opcode::BindlessImageAtomicIAdd32:
+    case IR::Opcode::BindlessImageAtomicSMin32:
+    case IR::Opcode::BindlessImageAtomicUMin32:
+    case IR::Opcode::BindlessImageAtomicSMax32:
+    case IR::Opcode::BindlessImageAtomicUMax32:
+    case IR::Opcode::BindlessImageAtomicInc32:
+    case IR::Opcode::BindlessImageAtomicDec32:
+    case IR::Opcode::BindlessImageAtomicAnd32:
+    case IR::Opcode::BindlessImageAtomicOr32:
+    case IR::Opcode::BindlessImageAtomicXor32:
+    case IR::Opcode::BindlessImageAtomicExchange32:
+        return true;
+    case IR::Opcode::BoundImageSampleImplicitLod:
+    case IR::Opcode::BoundImageSampleExplicitLod:
+    case IR::Opcode::BoundImageSampleDrefImplicitLod:
+    case IR::Opcode::BoundImageSampleDrefExplicitLod:
+    case IR::Opcode::BoundImageGather:
+    case IR::Opcode::BoundImageGatherDref:
+    case IR::Opcode::BoundImageFetch:
+    case IR::Opcode::BoundImageQueryDimensions:
+    case IR::Opcode::BoundImageQueryLod:
+    case IR::Opcode::BoundImageGradient:
+    case IR::Opcode::BoundImageRead:
+    case IR::Opcode::BoundImageWrite:
+    case IR::Opcode::BoundImageAtomicIAdd32:
+    case IR::Opcode::BoundImageAtomicSMin32:
+    case IR::Opcode::BoundImageAtomicUMin32:
+    case IR::Opcode::BoundImageAtomicSMax32:
+    case IR::Opcode::BoundImageAtomicUMax32:
+    case IR::Opcode::BoundImageAtomicInc32:
+    case IR::Opcode::BoundImageAtomicDec32:
+    case IR::Opcode::BoundImageAtomicAnd32:
+    case IR::Opcode::BoundImageAtomicOr32:
+    case IR::Opcode::BoundImageAtomicXor32:
+    case IR::Opcode::BoundImageAtomicExchange32:
+        return false;
+    default:
+        throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
+    }
+}
+
+bool IsTextureInstruction(const IR::Inst& inst) {
+    return IndexedInstruction(inst) != IR::Opcode::Void;
+}
+
+std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst);
+
+std::optional<ConstBufferAddr> Track(const IR::Value& value) {
+    return IR::BreadthFirstSearch(value, TryGetConstBuffer);
+}
+
+std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
+    switch (inst->GetOpcode()) {
+    default:
+        return std::nullopt;
+    case IR::Opcode::BitwiseOr32: {
+        std::optional lhs{Track(inst->Arg(0))};
+        std::optional rhs{Track(inst->Arg(1))};
+        if (!lhs || !rhs) {
+            return std::nullopt;
+        }
+        if (lhs->has_secondary || rhs->has_secondary) {
+            return std::nullopt;
+        }
+        if (lhs->count > 1 || rhs->count > 1) {
+            return std::nullopt;
+        }
+        if (lhs->index > rhs->index || lhs->offset > rhs->offset) {
+            std::swap(lhs, rhs);
+        }
+        return ConstBufferAddr{
+            .index = lhs->index,
+            .offset = lhs->offset,
+            .secondary_index = rhs->index,
+            .secondary_offset = rhs->offset,
+            .dynamic_offset = {},
+            .count = 1,
+            .has_secondary = true,
+        };
+    }
+    case IR::Opcode::GetCbufU32x2:
+    case IR::Opcode::GetCbufU32:
+        break;
+    }
+    const IR::Value index{inst->Arg(0)};
+    const IR::Value offset{inst->Arg(1)};
+    if (!index.IsImmediate()) {
+        // Reading a bindless texture from variable indices is valid
+        // but not supported here at the moment
+        return std::nullopt;
+    }
+    if (offset.IsImmediate()) {
+        return ConstBufferAddr{
+            .index = index.U32(),
+            .offset = offset.U32(),
+            .secondary_index = 0,
+            .secondary_offset = 0,
+            .dynamic_offset = {},
+            .count = 1,
+            .has_secondary = false,
+        };
+    }
+    IR::Inst* const offset_inst{offset.InstRecursive()};
+    if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) {
+        return std::nullopt;
+    }
+    u32 base_offset{};
+    IR::U32 dynamic_offset;
+    if (offset_inst->Arg(0).IsImmediate()) {
+        base_offset = offset_inst->Arg(0).U32();
+        dynamic_offset = IR::U32{offset_inst->Arg(1)};
+    } else if (offset_inst->Arg(1).IsImmediate()) {
+        base_offset = offset_inst->Arg(1).U32();
+        dynamic_offset = IR::U32{offset_inst->Arg(0)};
+    } else {
+        return std::nullopt;
+    }
+    return ConstBufferAddr{
+        .index = index.U32(),
+        .offset = base_offset,
+        .secondary_index = 0,
+        .secondary_offset = 0,
+        .dynamic_offset = dynamic_offset,
+        .count = 8,
+        .has_secondary = false,
+    };
+}
+
+TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
+    ConstBufferAddr addr;
+    if (IsBindless(inst)) {
+        const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))};
+        if (!track_addr) {
+            throw NotImplementedException("Failed to track bindless texture constant buffer");
+        }
+        addr = *track_addr;
+    } else {
+        addr = ConstBufferAddr{
+            .index = env.TextureBoundBuffer(),
+            .offset = inst.Arg(0).U32(),
+            .secondary_index = 0,
+            .secondary_offset = 0,
+            .dynamic_offset = {},
+            .count = 1,
+            .has_secondary = false,
+        };
+    }
+    return TextureInst{
+        .cbuf = addr,
+        .inst = &inst,
+        .block = block,
+    };
+}
+
+TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+    const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
+    const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
+    const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
+    const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
+    return env.ReadTextureType(lhs_raw | rhs_raw);
+}
+
+class Descriptors {
+public:
+    explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
+                         ImageBufferDescriptors& image_buffer_descriptors_,
+                         TextureDescriptors& texture_descriptors_,
+                         ImageDescriptors& image_descriptors_)
+        : texture_buffer_descriptors{texture_buffer_descriptors_},
+          image_buffer_descriptors{image_buffer_descriptors_},
+          texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {}
+
+    u32 Add(const TextureBufferDescriptor& desc) {
+        return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
+            return desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset &&
+                   desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
+                   desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+                   desc.count == existing.count && desc.size_shift == existing.size_shift &&
+                   desc.has_secondary == existing.has_secondary;
+        });
+    }
+
+    u32 Add(const ImageBufferDescriptor& desc) {
+        const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
+            return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
+                   desc.size_shift == existing.size_shift;
+        })};
+        image_buffer_descriptors[index].is_written |= desc.is_written;
+        image_buffer_descriptors[index].is_read |= desc.is_read;
+        return index;
+    }
+
+    u32 Add(const TextureDescriptor& desc) {
+        return Add(texture_descriptors, desc, [&desc](const auto& existing) {
+            return desc.type == existing.type && desc.is_depth == existing.is_depth &&
+                   desc.has_secondary == existing.has_secondary &&
+                   desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset &&
+                   desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
+                   desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+                   desc.count == existing.count && desc.size_shift == existing.size_shift;
+        });
+    }
+
+    u32 Add(const ImageDescriptor& desc) {
+        const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) {
+            return desc.type == existing.type && desc.format == existing.format &&
+                   desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
+                   desc.size_shift == existing.size_shift;
+        })};
+        image_descriptors[index].is_written |= desc.is_written;
+        image_descriptors[index].is_read |= desc.is_read;
+        return index;
+    }
+
+private:
+    template <typename Descriptors, typename Descriptor, typename Func>
+    static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+        // TODO: Handle arrays
+        const auto it{std::ranges::find_if(descriptors, pred)};
+        if (it != descriptors.end()) {
+            return static_cast<u32>(std::distance(descriptors.begin(), it));
+        }
+        descriptors.push_back(desc);
+        return static_cast<u32>(descriptors.size()) - 1;
+    }
+
+    TextureBufferDescriptors& texture_buffer_descriptors;
+    ImageBufferDescriptors& image_buffer_descriptors;
+    TextureDescriptors& texture_descriptors;
+    ImageDescriptors& image_descriptors;
+};
+} // Anonymous namespace
+
+void TexturePass(Environment& env, IR::Program& program) {
+    TextureInstVector to_replace;
+    for (IR::Block* const block : program.post_order_blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            if (!IsTextureInstruction(inst)) {
+                continue;
+            }
+            to_replace.push_back(MakeInst(env, block, inst));
+        }
+    }
+    // Sort instructions to visit textures by constant buffer index, then by offset
+    std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
+        return lhs.cbuf.offset < rhs.cbuf.offset;
+    });
+    std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
+        return lhs.cbuf.index < rhs.cbuf.index;
+    });
+    Descriptors descriptors{
+        program.info.texture_buffer_descriptors,
+        program.info.image_buffer_descriptors,
+        program.info.texture_descriptors,
+        program.info.image_descriptors,
+    };
+    for (TextureInst& texture_inst : to_replace) {
+        // TODO: Handle arrays
+        IR::Inst* const inst{texture_inst.inst};
+        inst->ReplaceOpcode(IndexedInstruction(*inst));
+
+        const auto& cbuf{texture_inst.cbuf};
+        auto flags{inst->Flags<IR::TextureInstInfo>()};
+        switch (inst->GetOpcode()) {
+        case IR::Opcode::ImageQueryDimensions:
+            flags.type.Assign(ReadTextureType(env, cbuf));
+            inst->SetFlags(flags);
+            break;
+        case IR::Opcode::ImageFetch:
+            if (flags.type != TextureType::Color1D) {
+                break;
+            }
+            if (ReadTextureType(env, cbuf) == TextureType::Buffer) {
+                // Replace with the bound texture type only when it's a texture buffer
+                // If the instruction is 1D and the bound type is 2D, don't change the code and let
+                // the rasterizer robustness handle it
+                // This happens on Fire Emblem: Three Houses
+                flags.type.Assign(TextureType::Buffer);
+            }
+            break;
+        default:
+            break;
+        }
+        u32 index;
+        switch (inst->GetOpcode()) {
+        case IR::Opcode::ImageRead:
+        case IR::Opcode::ImageAtomicIAdd32:
+        case IR::Opcode::ImageAtomicSMin32:
+        case IR::Opcode::ImageAtomicUMin32:
+        case IR::Opcode::ImageAtomicSMax32:
+        case IR::Opcode::ImageAtomicUMax32:
+        case IR::Opcode::ImageAtomicInc32:
+        case IR::Opcode::ImageAtomicDec32:
+        case IR::Opcode::ImageAtomicAnd32:
+        case IR::Opcode::ImageAtomicOr32:
+        case IR::Opcode::ImageAtomicXor32:
+        case IR::Opcode::ImageAtomicExchange32:
+        case IR::Opcode::ImageWrite: {
+            if (cbuf.has_secondary) {
+                throw NotImplementedException("Unexpected separate sampler");
+            }
+            const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
+            const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite};
+            if (flags.type == TextureType::Buffer) {
+                index = descriptors.Add(ImageBufferDescriptor{
+                    .format = flags.image_format,
+                    .is_written = is_written,
+                    .is_read = is_read,
+                    .cbuf_index = cbuf.index,
+                    .cbuf_offset = cbuf.offset,
+                    .count = cbuf.count,
+                    .size_shift = DESCRIPTOR_SIZE_SHIFT,
+                });
+            } else {
+                index = descriptors.Add(ImageDescriptor{
+                    .type = flags.type,
+                    .format = flags.image_format,
+                    .is_written = is_written,
+                    .is_read = is_read,
+                    .cbuf_index = cbuf.index,
+                    .cbuf_offset = cbuf.offset,
+                    .count = cbuf.count,
+                    .size_shift = DESCRIPTOR_SIZE_SHIFT,
+                });
+            }
+            break;
+        }
+        default:
+            if (flags.type == TextureType::Buffer) {
+                index = descriptors.Add(TextureBufferDescriptor{
+                    .has_secondary = cbuf.has_secondary,
+                    .cbuf_index = cbuf.index,
+                    .cbuf_offset = cbuf.offset,
+                    .secondary_cbuf_index = cbuf.secondary_index,
+                    .secondary_cbuf_offset = cbuf.secondary_offset,
+                    .count = cbuf.count,
+                    .size_shift = DESCRIPTOR_SIZE_SHIFT,
+                });
+            } else {
+                index = descriptors.Add(TextureDescriptor{
+                    .type = flags.type,
+                    .is_depth = flags.is_depth != 0,
+                    .has_secondary = cbuf.has_secondary,
+                    .cbuf_index = cbuf.index,
+                    .cbuf_offset = cbuf.offset,
+                    .secondary_cbuf_index = cbuf.secondary_index,
+                    .secondary_cbuf_offset = cbuf.secondary_offset,
+                    .count = cbuf.count,
+                    .size_shift = DESCRIPTOR_SIZE_SHIFT,
+                });
+            }
+            break;
+        }
+        flags.descriptor_index.Assign(index);
+        inst->SetFlags(flags);
+
+        if (cbuf.count > 1) {
+            const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
+            IR::IREmitter ir{*texture_inst.block, insert_point};
+            const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))};
+            inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift));
+        } else {
+            inst->SetArg(0, IR::Value{});
+        }
+    }
+}
+
+void JoinTextureInfo(Info& base, Info& source) {
+    Descriptors descriptors{
+        base.texture_buffer_descriptors,
+        base.image_buffer_descriptors,
+        base.texture_descriptors,
+        base.image_descriptors,
+    };
+    for (auto& desc : source.texture_buffer_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.image_buffer_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.texture_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.image_descriptors) {
+        descriptors.Add(desc);
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
new file mode 100644
index 000000000..975d5aadf
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -0,0 +1,98 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <set>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+static void ValidateTypes(const IR::Program& program) {
+    for (const auto& block : program.blocks) {
+        for (const IR::Inst& inst : *block) {
+            if (inst.GetOpcode() == IR::Opcode::Phi) {
+                // Skip validation on phi nodes
+                continue;
+            }
+            const size_t num_args{inst.NumArgs()};
+            for (size_t i = 0; i < num_args; ++i) {
+                const IR::Type t1{inst.Arg(i).Type()};
+                const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
+                if (!IR::AreTypesCompatible(t1, t2)) {
+                    throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
+                }
+            }
+        }
+    }
+}
+
+static void ValidateUses(const IR::Program& program) {
+    std::map<IR::Inst*, int> actual_uses;
+    for (const auto& block : program.blocks) {
+        for (const IR::Inst& inst : *block) {
+            const size_t num_args{inst.NumArgs()};
+            for (size_t i = 0; i < num_args; ++i) {
+                const IR::Value arg{inst.Arg(i)};
+                if (!arg.IsImmediate()) {
+                    ++actual_uses[arg.Inst()];
+                }
+            }
+        }
+    }
+    for (const auto [inst, uses] : actual_uses) {
+        if (inst->UseCount() != uses) {
+            throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
+        }
+    }
+}
+
+static void ValidateForwardDeclarations(const IR::Program& program) {
+    std::set<const IR::Inst*> definitions;
+    for (const IR::Block* const block : program.blocks) {
+        for (const IR::Inst& inst : *block) {
+            definitions.emplace(&inst);
+            if (inst.GetOpcode() == IR::Opcode::Phi) {
+                // Phi nodes can have forward declarations
+                continue;
+            }
+            const size_t num_args{inst.NumArgs()};
+            for (size_t arg = 0; arg < num_args; ++arg) {
+                if (inst.Arg(arg).IsImmediate()) {
+                    continue;
+                }
+                if (!definitions.contains(inst.Arg(arg).Inst())) {
+                    throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
+                }
+            }
+        }
+    }
+}
+
+static void ValidatePhiNodes(const IR::Program& program) {
+    for (const IR::Block* const block : program.blocks) {
+        bool no_more_phis{false};
+        for (const IR::Inst& inst : *block) {
+            if (inst.GetOpcode() == IR::Opcode::Phi) {
+                if (no_more_phis) {
+                    throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
+                }
+            } else {
+                no_more_phis = true;
+            }
+        }
+    }
+}
+
+void VerificationPass(const IR::Program& program) {
+    ValidateTypes(program);
+    ValidateUses(program);
+    ValidateForwardDeclarations(program);
+    ValidatePhiNodes(program);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
new file mode 100644
index 000000000..f8b255b66
--- /dev/null
+++ b/src/shader_recompiler/object_pool.h
@@ -0,0 +1,104 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace Shader {
+
+template <typename T>
+requires std::is_destructible_v<T> class ObjectPool {
+public:
+    explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
+        node = &chunks.emplace_back(new_chunk_size);
+    }
+
+    template <typename... Args>
+    requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) {
+        return std::construct_at(Memory(), std::forward<Args>(args)...);
+    }
+
+    void ReleaseContents() {
+        if (chunks.empty()) {
+            return;
+        }
+        Chunk& root{chunks.front()};
+        if (root.used_objects == root.num_objects) {
+            // Root chunk has been filled, squash allocations into it
+            const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)};
+            chunks.clear();
+            chunks.emplace_back(total_objects);
+        } else {
+            root.Release();
+            chunks.resize(1);
+        }
+        chunks.shrink_to_fit();
+        node = &chunks.front();
+    }
+
+private:
+    struct NonTrivialDummy {
+        NonTrivialDummy() noexcept {}
+    };
+
+    union Storage {
+        Storage() noexcept {}
+        ~Storage() noexcept {}
+
+        NonTrivialDummy dummy{};
+        T object;
+    };
+
+    struct Chunk {
+        explicit Chunk() = default;
+        explicit Chunk(size_t size)
+            : num_objects{size}, storage{std::make_unique<Storage[]>(size)} {}
+
+        Chunk& operator=(Chunk&& rhs) noexcept {
+            Release();
+            used_objects = std::exchange(rhs.used_objects, 0);
+            num_objects = std::exchange(rhs.num_objects, 0);
+            storage = std::move(rhs.storage);
+        }
+
+        Chunk(Chunk&& rhs) noexcept
+            : used_objects{std::exchange(rhs.used_objects, 0)},
+              num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {}
+
+        ~Chunk() {
+            Release();
+        }
+
+        void Release() {
+            std::destroy_n(storage.get(), used_objects);
+            used_objects = 0;
+        }
+
+        size_t used_objects{};
+        size_t num_objects{};
+        std::unique_ptr<Storage[]> storage;
+    };
+
+    [[nodiscard]] T* Memory() {
+        Chunk* const chunk{FreeChunk()};
+        return &chunk->storage[chunk->used_objects++].object;
+    }
+
+    [[nodiscard]] Chunk* FreeChunk() {
+        if (node->used_objects != node->num_objects) {
+            return node;
+        }
+        node = &chunks.emplace_back(new_chunk_size);
+        return node;
+    }
+
+    Chunk* node{};
+    std::vector<Chunk> chunks;
+    size_t new_chunk_size{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
new file mode 100644
index 000000000..f0c3b3b17
--- /dev/null
+++ b/src/shader_recompiler/profile.h
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader {
+
+struct Profile {
+    u32 supported_spirv{0x00010000};
+
+    bool unified_descriptor_binding{};
+    bool support_descriptor_aliasing{};
+    bool support_int8{};
+    bool support_int16{};
+    bool support_int64{};
+    bool support_vertex_instance_id{};
+    bool support_float_controls{};
+    bool support_separate_denorm_behavior{};
+    bool support_separate_rounding_mode{};
+    bool support_fp16_denorm_preserve{};
+    bool support_fp32_denorm_preserve{};
+    bool support_fp16_denorm_flush{};
+    bool support_fp32_denorm_flush{};
+    bool support_fp16_signed_zero_nan_preserve{};
+    bool support_fp32_signed_zero_nan_preserve{};
+    bool support_fp64_signed_zero_nan_preserve{};
+    bool support_explicit_workgroup_layout{};
+    bool support_vote{};
+    bool support_viewport_index_layer_non_geometry{};
+    bool support_viewport_mask{};
+    bool support_typeless_image_loads{};
+    bool support_demote_to_helper_invocation{};
+    bool support_int64_atomics{};
+    bool support_derivative_control{};
+    bool support_geometry_shader_passthrough{};
+    bool support_gl_nv_gpu_shader_5{};
+    bool support_gl_amd_gpu_shader_half_float{};
+    bool support_gl_texture_shadow_lod{};
+    bool support_gl_warp_intrinsics{};
+    bool support_gl_variable_aoffi{};
+    bool support_gl_sparse_textures{};
+    bool support_gl_derivative_control{};
+
+    bool warp_size_potentially_larger_than_guest{};
+
+    bool lower_left_origin_mode{};
+    /// Fragment outputs have to be declared even if they are not written to avoid undefined values.
+    /// See Ori and the Blind Forest's main menu for reference.
+    bool need_declared_frag_colors{};
+    /// Prevents fast math optimizations that may cause inaccuracies
+    bool need_fastmath_off{};
+
+    /// OpFClamp is broken and OpFMax + OpFMin should be used instead
+    bool has_broken_spirv_clamp{};
+    /// Offset image operands with an unsigned type do not work
+    bool has_broken_unsigned_image_offsets{};
+    /// Signed instructions with unsigned data types are misinterpreted
+    bool has_broken_signed_operations{};
+    /// Float controls break when fp16 is enabled
+    bool has_broken_fp16_float_controls{};
+    /// Dynamic vec4 indexing is broken on some OpenGL drivers
+    bool has_gl_component_indexing_bug{};
+    /// The precise type qualifier is broken in the fragment stage of some drivers
+    bool has_gl_precise_bug{};
+    /// Ignores SPIR-V ordered vs unordered using GLSL semantics
+    bool ignore_nan_fp_comparisons{};
+
+    u32 gl_max_compute_smem_size{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h
new file mode 100644
index 000000000..bd6c2bfb5
--- /dev/null
+++ b/src/shader_recompiler/program_header.h
@@ -0,0 +1,219 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Shader {
+
+enum class OutputTopology : u32 {
+    PointList = 1,
+    LineStrip = 6,
+    TriangleStrip = 7,
+};
+
+enum class PixelImap : u8 {
+    Unused = 0,
+    Constant = 1,
+    Perspective = 2,
+    ScreenLinear = 3,
+};
+
+// Documentation in:
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
+struct ProgramHeader {
+    union {
+        BitField<0, 5, u32> sph_type;
+        BitField<5, 5, u32> version;
+        BitField<10, 4, u32> shader_type;
+        BitField<14, 1, u32> mrt_enable;
+        BitField<15, 1, u32> kills_pixels;
+        BitField<16, 1, u32> does_global_store;
+        BitField<17, 4, u32> sass_version;
+        BitField<21, 2, u32> reserved1;
+        BitField<24, 1, u32> geometry_passthrough;
+        BitField<25, 1, u32> reserved2;
+        BitField<26, 1, u32> does_load_or_store;
+        BitField<27, 1, u32> does_fp64;
+        BitField<28, 4, u32> stream_out_mask;
+    } common0;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_low_size;
+        BitField<24, 8, u32> per_patch_attribute_count;
+    } common1;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_high_size;
+        BitField<24, 8, u32> threads_per_input_primitive;
+    } common2;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_crs_size;
+        BitField<24, 4, OutputTopology> output_topology;
+        BitField<28, 4, u32> reserved;
+    } common3;
+
+    union {
+        BitField<0, 12, u32> max_output_vertices;
+        BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
+        BitField<20, 4, u32> reserved;
+        BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+    } common4;
+
+    union {
+        struct {
+            INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+
+            union {
+                BitField<0, 1, u8> primitive_array_id;
+                BitField<1, 1, u8> rt_array_index;
+                BitField<2, 1, u8> viewport_index;
+                BitField<3, 1, u8> point_size;
+                BitField<4, 1, u8> position_x;
+                BitField<5, 1, u8> position_y;
+                BitField<6, 1, u8> position_z;
+                BitField<7, 1, u8> position_w;
+                u8 raw;
+            } imap_systemb;
+
+            std::array<u8, 16> imap_generic_vector;
+
+            INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
+            union {
+                BitField<0, 8, u16> clip_distances;
+                BitField<8, 1, u16> point_sprite_s;
+                BitField<9, 1, u16> point_sprite_t;
+                BitField<10, 1, u16> fog_coordinate;
+                BitField<12, 1, u16> tessellation_eval_point_u;
+                BitField<13, 1, u16> tessellation_eval_point_v;
+                BitField<14, 1, u16> instance_id;
+                BitField<15, 1, u16> vertex_id;
+            };
+            INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
+            INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
+
+            union {
+                BitField<0, 1, u8> primitive_array_id;
+                BitField<1, 1, u8> rt_array_index;
+                BitField<2, 1, u8> viewport_index;
+                BitField<3, 1, u8> point_size;
+                BitField<4, 1, u8> position_x;
+                BitField<5, 1, u8> position_y;
+                BitField<6, 1, u8> position_z;
+                BitField<7, 1, u8> position_w;
+                u8 raw;
+            } omap_systemb;
+
+            std::array<u8, 16> omap_generic_vector;
+
+            INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
+
+            union {
+                BitField<0, 8, u16> clip_distances;
+                BitField<8, 1, u16> point_sprite_s;
+                BitField<9, 1, u16> point_sprite_t;
+                BitField<10, 1, u16> fog_coordinate;
+                BitField<12, 1, u16> tessellation_eval_point_u;
+                BitField<13, 1, u16> tessellation_eval_point_v;
+                BitField<14, 1, u16> instance_id;
+                BitField<15, 1, u16> vertex_id;
+            } omap_systemc;
+
+            INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
+
+            [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept {
+                const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)};
+                return {
+                    (data & 1) != 0,
+                    (data & 2) != 0,
+                    (data & 4) != 0,
+                    (data & 8) != 0,
+                };
+            }
+
+            [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept {
+                const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)};
+                return {
+                    (data & 1) != 0,
+                    (data & 2) != 0,
+                    (data & 4) != 0,
+                    (data & 8) != 0,
+                };
+            }
+        } vtg;
+
+        struct {
+            INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+
+            union {
+                BitField<0, 1, u8> primitive_array_id;
+                BitField<1, 1, u8> rt_array_index;
+                BitField<2, 1, u8> viewport_index;
+                BitField<3, 1, u8> point_size;
+                BitField<4, 1, u8> position_x;
+                BitField<5, 1, u8> position_y;
+                BitField<6, 1, u8> position_z;
+                BitField<7, 1, u8> position_w;
+                BitField<0, 4, u8> first;
+                BitField<4, 4, u8> position;
+                u8 raw;
+            } imap_systemb;
+
+            union {
+                BitField<0, 2, PixelImap> x;
+                BitField<2, 2, PixelImap> y;
+                BitField<4, 2, PixelImap> z;
+                BitField<6, 2, PixelImap> w;
+                u8 raw;
+            } imap_generic_vector[32];
+
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapSystemValuesC
+            INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapReserved
+
+            struct {
+                u32 target;
+                union {
+                    BitField<0, 1, u32> sample_mask;
+                    BitField<1, 1, u32> depth;
+                    BitField<2, 30, u32> reserved;
+                };
+            } omap;
+
+            [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept {
+                const u32 bits{omap.target >> (rt * 4)};
+                return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0};
+            }
+
+            [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const {
+                const auto& vector{imap_generic_vector[attribute]};
+                return {vector.x, vector.y, vector.z, vector.w};
+            }
+
+            [[nodiscard]] bool IsGenericVectorActive(size_t index) const {
+                return imap_generic_vector[index].raw != 0;
+            }
+        } ps;
+
+        std::array<u32, 0xf> raw;
+    };
+
+    [[nodiscard]] u64 LocalMemorySize() const noexcept {
+        return static_cast<u64>(common1.shader_local_memory_low_size) |
+               (static_cast<u64>(common2.shader_local_memory_high_size) << 24);
+    }
+};
+static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size");
+
+} // namespace Shader
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
new file mode 100644
index 000000000..f3f83a258
--- /dev/null
+++ b/src/shader_recompiler/runtime_info.h
@@ -0,0 +1,88 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <optional>
+#include <vector>
+
+#include "common/common_types.h"
+#include "shader_recompiler/varying_state.h"
+
+namespace Shader {
+
+enum class AttributeType : u8 {
+    Float,
+    SignedInt,
+    UnsignedInt,
+    Disabled,
+};
+
+enum class InputTopology {
+    Points,
+    Lines,
+    LinesAdjacency,
+    Triangles,
+    TrianglesAdjacency,
+};
+
+enum class CompareFunction {
+    Never,
+    Less,
+    Equal,
+    LessThanEqual,
+    Greater,
+    NotEqual,
+    GreaterThanEqual,
+    Always,
+};
+
+enum class TessPrimitive {
+    Isolines,
+    Triangles,
+    Quads,
+};
+
+enum class TessSpacing {
+    Equal,
+    FractionalOdd,
+    FractionalEven,
+};
+
+struct TransformFeedbackVarying {
+    u32 buffer{};
+    u32 stride{};
+    u32 offset{};
+    u32 components{};
+};
+
+struct RuntimeInfo {
+    std::array<AttributeType, 32> generic_input_types{};
+    VaryingState previous_stage_stores;
+
+    bool convert_depth_mode{};
+    bool force_early_z{};
+
+    TessPrimitive tess_primitive{};
+    TessSpacing tess_spacing{};
+    bool tess_clockwise{};
+
+    InputTopology input_topology{};
+
+    std::optional<float> fixed_state_point_size;
+    std::optional<CompareFunction> alpha_test_func;
+    float alpha_test_reference{};
+
+    /// Static Y negate value
+    bool y_negate{};
+    /// Use storage buffers instead of global pointers on GLASM
+    bool glasm_use_storage_buffers{};
+
+    /// Transform feedback state for each varying
+    std::vector<TransformFeedbackVarying> xfb_varyings;
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
new file mode 100644
index 000000000..4ef4dbd40
--- /dev/null
+++ b/src/shader_recompiler/shader_info.h
@@ -0,0 +1,193 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/varying_state.h"
+
+#include <boost/container/small_vector.hpp>
+#include <boost/container/static_vector.hpp>
+
+namespace Shader {
+
+enum class TextureType : u32 {
+    Color1D,
+    ColorArray1D,
+    Color2D,
+    ColorArray2D,
+    Color3D,
+    ColorCube,
+    ColorArrayCube,
+    Buffer,
+};
+constexpr u32 NUM_TEXTURE_TYPES = 8;
+
+enum class ImageFormat : u32 {
+    Typeless,
+    R8_UINT,
+    R8_SINT,
+    R16_UINT,
+    R16_SINT,
+    R32_UINT,
+    R32G32_UINT,
+    R32G32B32A32_UINT,
+};
+
+enum class Interpolation {
+    Smooth,
+    Flat,
+    NoPerspective,
+};
+
+struct ConstantBufferDescriptor {
+    u32 index;
+    u32 count;
+};
+
+struct StorageBufferDescriptor {
+    u32 cbuf_index;
+    u32 cbuf_offset;
+    u32 count;
+    bool is_written;
+};
+
+struct TextureBufferDescriptor {
+    bool has_secondary;
+    u32 cbuf_index;
+    u32 cbuf_offset;
+    u32 secondary_cbuf_index;
+    u32 secondary_cbuf_offset;
+    u32 count;
+    u32 size_shift;
+};
+using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
+
+struct ImageBufferDescriptor {
+    ImageFormat format;
+    bool is_written;
+    bool is_read;
+    u32 cbuf_index;
+    u32 cbuf_offset;
+    u32 count;
+    u32 size_shift;
+};
+using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
+
+struct TextureDescriptor {
+    TextureType type;
+    bool is_depth;
+    bool has_secondary;
+    u32 cbuf_index;
+    u32 cbuf_offset;
+    u32 secondary_cbuf_index;
+    u32 secondary_cbuf_offset;
+    u32 count;
+    u32 size_shift;
+};
+using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
+
+struct ImageDescriptor {
+    TextureType type;
+    ImageFormat format;
+    bool is_written;
+    bool is_read;
+    u32 cbuf_index;
+    u32 cbuf_offset;
+    u32 count;
+    u32 size_shift;
+};
+using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
+
+struct Info {
+    static constexpr size_t MAX_CBUFS{18};
+    static constexpr size_t MAX_SSBOS{32};
+
+    bool uses_workgroup_id{};
+    bool uses_local_invocation_id{};
+    bool uses_invocation_id{};
+    bool uses_sample_id{};
+    bool uses_is_helper_invocation{};
+    bool uses_subgroup_invocation_id{};
+    bool uses_subgroup_shuffles{};
+    std::array<bool, 30> uses_patches{};
+
+    std::array<Interpolation, 32> interpolation{};
+    VaryingState loads;
+    VaryingState stores;
+    VaryingState passthrough;
+
+    bool loads_indexed_attributes{};
+
+    std::array<bool, 8> stores_frag_color{};
+    bool stores_sample_mask{};
+    bool stores_frag_depth{};
+
+    bool stores_tess_level_outer{};
+    bool stores_tess_level_inner{};
+
+    bool stores_indexed_attributes{};
+
+    bool stores_global_memory{};
+
+    bool uses_fp16{};
+    bool uses_fp64{};
+    bool uses_fp16_denorms_flush{};
+    bool uses_fp16_denorms_preserve{};
+    bool uses_fp32_denorms_flush{};
+    bool uses_fp32_denorms_preserve{};
+    bool uses_int8{};
+    bool uses_int16{};
+    bool uses_int64{};
+    bool uses_image_1d{};
+    bool uses_sampled_1d{};
+    bool uses_sparse_residency{};
+    bool uses_demote_to_helper_invocation{};
+    bool uses_subgroup_vote{};
+    bool uses_subgroup_mask{};
+    bool uses_fswzadd{};
+    bool uses_derivatives{};
+    bool uses_typeless_image_reads{};
+    bool uses_typeless_image_writes{};
+    bool uses_image_buffers{};
+    bool uses_shared_increment{};
+    bool uses_shared_decrement{};
+    bool uses_global_increment{};
+    bool uses_global_decrement{};
+    bool uses_atomic_f32_add{};
+    bool uses_atomic_f16x2_add{};
+    bool uses_atomic_f16x2_min{};
+    bool uses_atomic_f16x2_max{};
+    bool uses_atomic_f32x2_add{};
+    bool uses_atomic_f32x2_min{};
+    bool uses_atomic_f32x2_max{};
+    bool uses_atomic_s32_min{};
+    bool uses_atomic_s32_max{};
+    bool uses_int64_bit_atomics{};
+    bool uses_global_memory{};
+    bool uses_atomic_image_u32{};
+    bool uses_shadow_lod{};
+
+    IR::Type used_constant_buffer_types{};
+    IR::Type used_storage_buffer_types{};
+
+    u32 constant_buffer_mask{};
+    std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};
+    u32 nvn_buffer_base{};
+    std::bitset<16> nvn_buffer_used{};
+
+    boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
+        constant_buffer_descriptors;
+    boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
+    TextureBufferDescriptors texture_buffer_descriptors;
+    ImageBufferDescriptors image_buffer_descriptors;
+    TextureDescriptors texture_descriptors;
+    ImageDescriptors image_descriptors;
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h
new file mode 100644
index 000000000..5c1c8d8fc
--- /dev/null
+++ b/src/shader_recompiler/stage.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader {
+
+enum class Stage : u32 {
+    VertexB,
+    TessellationControl,
+    TessellationEval,
+    Geometry,
+    Fragment,
+
+    Compute,
+
+    VertexA,
+};
+constexpr u32 MaxStageTypes = 6;
+
+[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
+    return static_cast<Stage>(static_cast<size_t>(Stage::VertexB) + index);
+}
+
+} // namespace Shader
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h
new file mode 100644
index 000000000..9d7b24a76
--- /dev/null
+++ b/src/shader_recompiler/varying_state.h
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <cstddef>
+
+#include "shader_recompiler/frontend/ir/attribute.h"
+
+namespace Shader {
+
+struct VaryingState {
+    std::bitset<256> mask{};
+
+    void Set(IR::Attribute attribute, bool state = true) {
+        mask[static_cast<size_t>(attribute)] = state;
+    }
+
+    [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept {
+        return mask[static_cast<size_t>(attribute)];
+    }
+
+    [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept {
+        return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] ||
+               mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3];
+    }
+
+    [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept {
+        return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] &&
+               mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3];
+    }
+
+    [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept {
+        return AnyComponent(base) == AllComponents(base);
+    }
+
+    [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept {
+        return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component];
+    }
+
+    [[nodiscard]] bool Generic(size_t index) const noexcept {
+        return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3);
+    }
+
+    [[nodiscard]] bool ClipDistances() const noexcept {
+        return AnyComponent(IR::Attribute::ClipDistance0) ||
+               AnyComponent(IR::Attribute::ClipDistance4);
+    }
+
+    [[nodiscard]] bool Legacy() const noexcept {
+        return AnyComponent(IR::Attribute::ColorFrontDiffuseR) ||
+               AnyComponent(IR::Attribute::ColorFrontSpecularR) ||
+               AnyComponent(IR::Attribute::ColorBackDiffuseR) ||
+               AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture();
+    }
+
+    [[nodiscard]] bool FixedFunctionTexture() const noexcept {
+        for (size_t index = 0; index < 10; ++index) {
+            if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+                return true;
+            }
+        }
+        return false;
+    }
+};
+
+} // namespace Shader
diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp
index ac9912738..aa6e86593 100644
--- a/src/tests/common/unique_function.cpp
+++ b/src/tests/common/unique_function.cpp
@@ -17,10 +17,12 @@ struct Noisy {
     Noisy& operator=(Noisy&& rhs) noexcept {
         state = "Move assigned";
         rhs.state = "Moved away";
+        return *this;
     }
     Noisy(const Noisy&) : state{"Copied constructed"} {}
     Noisy& operator=(const Noisy&) {
         state = "Copied assigned";
+        return *this;
     }
 
     std::string state;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e4de55f4d..007ecc13e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,7 +29,6 @@ add_library(video_core STATIC
     dirty_flags.h
     dma_pusher.cpp
     dma_pusher.h
-    engines/const_buffer_engine_interface.h
     engines/const_buffer_info.h
     engines/engine_interface.h
     engines/engine_upload.cpp
@@ -44,9 +43,6 @@ add_library(video_core STATIC
     engines/maxwell_3d.h
     engines/maxwell_dma.cpp
     engines/maxwell_dma.h
-    engines/shader_bytecode.h
-    engines/shader_header.h
-    engines/shader_type.h
     framebuffer_config.h
     macro/macro.cpp
     macro/macro.h
@@ -61,8 +57,6 @@ add_library(video_core STATIC
     gpu.h
     gpu_thread.cpp
     gpu_thread.h
-    guest_driver.cpp
-    guest_driver.h
     memory_manager.cpp
     memory_manager.h
     query_cache.h
@@ -71,26 +65,25 @@ add_library(video_core STATIC
     rasterizer_interface.h
     renderer_base.cpp
     renderer_base.h
-    renderer_opengl/gl_arb_decompiler.cpp
-    renderer_opengl/gl_arb_decompiler.h
     renderer_opengl/gl_buffer_cache.cpp
     renderer_opengl/gl_buffer_cache.h
+    renderer_opengl/gl_compute_pipeline.cpp
+    renderer_opengl/gl_compute_pipeline.h
     renderer_opengl/gl_device.cpp
     renderer_opengl/gl_device.h
     renderer_opengl/gl_fence_manager.cpp
     renderer_opengl/gl_fence_manager.h
+    renderer_opengl/gl_graphics_pipeline.cpp
+    renderer_opengl/gl_graphics_pipeline.h
     renderer_opengl/gl_rasterizer.cpp
     renderer_opengl/gl_rasterizer.h
     renderer_opengl/gl_resource_manager.cpp
     renderer_opengl/gl_resource_manager.h
     renderer_opengl/gl_shader_cache.cpp
     renderer_opengl/gl_shader_cache.h
-    renderer_opengl/gl_shader_decompiler.cpp
-    renderer_opengl/gl_shader_decompiler.h
-    renderer_opengl/gl_shader_disk_cache.cpp
-    renderer_opengl/gl_shader_disk_cache.h
     renderer_opengl/gl_shader_manager.cpp
     renderer_opengl/gl_shader_manager.h
+    renderer_opengl/gl_shader_context.h
     renderer_opengl/gl_shader_util.cpp
     renderer_opengl/gl_shader_util.h
     renderer_opengl/gl_state_tracker.cpp
@@ -112,6 +105,7 @@ add_library(video_core STATIC
     renderer_vulkan/fixed_pipeline_state.h
     renderer_vulkan/maxwell_to_vk.cpp
     renderer_vulkan/maxwell_to_vk.h
+    renderer_vulkan/pipeline_helper.h
     renderer_vulkan/renderer_vulkan.h
     renderer_vulkan/renderer_vulkan.cpp
     renderer_vulkan/vk_blit_screen.cpp
@@ -138,12 +132,12 @@ add_library(video_core STATIC
     renderer_vulkan/vk_query_cache.h
     renderer_vulkan/vk_rasterizer.cpp
     renderer_vulkan/vk_rasterizer.h
+    renderer_vulkan/vk_render_pass_cache.cpp
+    renderer_vulkan/vk_render_pass_cache.h
     renderer_vulkan/vk_resource_pool.cpp
     renderer_vulkan/vk_resource_pool.h
     renderer_vulkan/vk_scheduler.cpp
     renderer_vulkan/vk_scheduler.h
-    renderer_vulkan/vk_shader_decompiler.cpp
-    renderer_vulkan/vk_shader_decompiler.h
     renderer_vulkan/vk_shader_util.cpp
     renderer_vulkan/vk_shader_util.h
     renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -156,60 +150,12 @@ add_library(video_core STATIC
     renderer_vulkan/vk_texture_cache.h
     renderer_vulkan/vk_update_descriptor.cpp
     renderer_vulkan/vk_update_descriptor.h
+    shader_cache.cpp
     shader_cache.h
+    shader_environment.cpp
+    shader_environment.h
     shader_notify.cpp
     shader_notify.h
-    shader/decode/arithmetic.cpp
-    shader/decode/arithmetic_immediate.cpp
-    shader/decode/bfe.cpp
-    shader/decode/bfi.cpp
-    shader/decode/shift.cpp
-    shader/decode/arithmetic_integer.cpp
-    shader/decode/arithmetic_integer_immediate.cpp
-    shader/decode/arithmetic_half.cpp
-    shader/decode/arithmetic_half_immediate.cpp
-    shader/decode/ffma.cpp
-    shader/decode/hfma2.cpp
-    shader/decode/conversion.cpp
-    shader/decode/memory.cpp
-    shader/decode/texture.cpp
-    shader/decode/image.cpp
-    shader/decode/float_set_predicate.cpp
-    shader/decode/integer_set_predicate.cpp
-    shader/decode/half_set_predicate.cpp
-    shader/decode/predicate_set_register.cpp
-    shader/decode/predicate_set_predicate.cpp
-    shader/decode/register_set_predicate.cpp
-    shader/decode/float_set.cpp
-    shader/decode/integer_set.cpp
-    shader/decode/half_set.cpp
-    shader/decode/video.cpp
-    shader/decode/warp.cpp
-    shader/decode/xmad.cpp
-    shader/decode/other.cpp
-    shader/ast.cpp
-    shader/ast.h
-    shader/async_shaders.cpp
-    shader/async_shaders.h
-    shader/compiler_settings.cpp
-    shader/compiler_settings.h
-    shader/control_flow.cpp
-    shader/control_flow.h
-    shader/decode.cpp
-    shader/expr.cpp
-    shader/expr.h
-    shader/memory_util.cpp
-    shader/memory_util.h
-    shader/node_helper.cpp
-    shader/node_helper.h
-    shader/node.h
-    shader/registry.cpp
-    shader/registry.h
-    shader/shader_ir.cpp
-    shader/shader_ir.h
-    shader/track.cpp
-    shader/transform_feedback.cpp
-    shader/transform_feedback.h
     surface.cpp
     surface.h
     texture_cache/accelerated_swizzle.cpp
@@ -242,6 +188,8 @@ add_library(video_core STATIC
     textures/decoders.h
     textures/texture.cpp
     textures/texture.h
+    transform_feedback.cpp
+    transform_feedback.h
     video_core.cpp
     video_core.h
     vulkan_common/vulkan_debug_callback.cpp
@@ -265,7 +213,7 @@ add_library(video_core STATIC
 create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad xbyak)
+target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
 
 if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
     add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 5a0b6f0c0..24c858104 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -31,6 +31,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
 #include "video_core/texture_cache/slot_vector.h"
 #include "video_core/texture_cache/types.h"
 
@@ -42,14 +43,19 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory);
 
 using BufferId = SlotId;
 
+using VideoCore::Surface::PixelFormat;
+using namespace Common::Literals;
+
 constexpr u32 NUM_VERTEX_BUFFERS = 32;
 constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
 constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
 constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
 constexpr u32 NUM_STORAGE_BUFFERS = 16;
+constexpr u32 NUM_TEXTURE_BUFFERS = 16;
 constexpr u32 NUM_STAGES = 5;
 
-using namespace Common::Literals;
+using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
+using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
 
 template <typename P>
 class BufferCache {
@@ -67,6 +73,7 @@ class BufferCache {
     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
     static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
     static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
+    static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
 
     static constexpr BufferId NULL_BUFFER_ID{0};
 
@@ -96,6 +103,10 @@ class BufferCache {
         BufferId buffer_id;
     };
 
+    struct TextureBufferBinding : Binding {
+        PixelFormat format;
+    };
+
     static constexpr Binding NULL_BINDING{
         .cpu_addr = 0,
         .size = 0,
@@ -133,20 +144,31 @@ public:
 
     void BindHostComputeBuffers();
 
-    void SetEnabledUniformBuffers(size_t stage, u32 enabled);
+    void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
+                                const UniformBufferSizes* sizes);
 
-    void SetEnabledComputeUniformBuffers(u32 enabled);
+    void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
 
     void UnbindGraphicsStorageBuffers(size_t stage);
 
     void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
                                    bool is_written);
 
+    void UnbindGraphicsTextureBuffers(size_t stage);
+
+    void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
+                                   PixelFormat format, bool is_written, bool is_image);
+
     void UnbindComputeStorageBuffers();
 
     void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
                                   bool is_written);
 
+    void UnbindComputeTextureBuffers();
+
+    void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
+                                  bool is_written, bool is_image);
+
     void FlushCachedWrites();
 
     /// Return true when there are uncommitted buffers to be downloaded
@@ -178,6 +200,7 @@ public:
     [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
 
     std::mutex mutex;
+    Runtime& runtime;
 
 private:
     template <typename Func>
@@ -254,12 +277,16 @@ private:
 
     void BindHostGraphicsStorageBuffers(size_t stage);
 
+    void BindHostGraphicsTextureBuffers(size_t stage);
+
     void BindHostTransformFeedbackBuffers();
 
     void BindHostComputeUniformBuffers();
 
     void BindHostComputeStorageBuffers();
 
+    void BindHostComputeTextureBuffers();
+
     void DoUpdateGraphicsBuffers(bool is_indexed);
 
     void DoUpdateComputeBuffers();
@@ -274,6 +301,8 @@ private:
 
     void UpdateStorageBuffers(size_t stage);
 
+    void UpdateTextureBuffers(size_t stage);
+
     void UpdateTransformFeedbackBuffers();
 
     void UpdateTransformFeedbackBuffer(u32 index);
@@ -282,6 +311,8 @@ private:
 
     void UpdateComputeStorageBuffers();
 
+    void UpdateComputeTextureBuffers();
+
     void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
 
     [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
@@ -323,6 +354,9 @@ private:
 
     [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
 
+    [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
+                                                               PixelFormat format);
+
     [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
 
     [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
@@ -336,7 +370,6 @@ private:
     Tegra::Engines::KeplerCompute& kepler_compute;
     Tegra::MemoryManager& gpu_memory;
     Core::Memory::Memory& cpu_memory;
-    Runtime& runtime;
 
     SlotVector<Buffer> slot_buffers;
     DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -347,20 +380,30 @@ private:
     std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
     std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
     std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
+    std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
     std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
 
     std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
     std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
+    std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
+
+    std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
+    u32 enabled_compute_uniform_buffer_mask = 0;
 
-    std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
-    u32 enabled_compute_uniform_buffers = 0;
+    const UniformBufferSizes* uniform_buffer_sizes{};
+    const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
 
     std::array<u32, NUM_STAGES> enabled_storage_buffers{};
     std::array<u32, NUM_STAGES> written_storage_buffers{};
     u32 enabled_compute_storage_buffers = 0;
     u32 written_compute_storage_buffers = 0;
 
-    std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
+    std::array<u32, NUM_STAGES> enabled_texture_buffers{};
+    std::array<u32, NUM_STAGES> written_texture_buffers{};
+    std::array<u32, NUM_STAGES> image_texture_buffers{};
+    u32 enabled_compute_texture_buffers = 0;
+    u32 written_compute_texture_buffers = 0;
+    u32 image_compute_texture_buffers = 0;
 
     std::array<u32, 16> uniform_cache_hits{};
     std::array<u32, 16> uniform_cache_shots{};
@@ -371,6 +414,10 @@ private:
 
     std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
         dirty_uniform_buffers{};
+    std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
+    std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
+                       std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
+        uniform_buffer_binding_sizes{};
 
     std::vector<BufferId> cached_write_buffer_ids;
 
@@ -394,8 +441,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
                             Tegra::Engines::KeplerCompute& kepler_compute_,
                             Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
                             Runtime& runtime_)
-    : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
-      gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
+    : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
     // Ensure the first slot is used for the null buffer
     void(slot_buffers.insert(runtime, NullBufferParams{}));
     deletion_iterator = slot_buffers.end();
@@ -615,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) {
     MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
     BindHostGraphicsUniformBuffers(stage);
     BindHostGraphicsStorageBuffers(stage);
+    BindHostGraphicsTextureBuffers(stage);
 }
 
 template <class P>
@@ -622,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() {
     MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
     BindHostComputeUniformBuffers();
     BindHostComputeStorageBuffers();
+    BindHostComputeTextureBuffers();
 }
 
 template <class P>
-void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
+void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
+                                            const UniformBufferSizes* sizes) {
     if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
-        if (enabled_uniform_buffers[stage] != enabled) {
-            dirty_uniform_buffers[stage] = ~u32{0};
+        if (enabled_uniform_buffer_masks != mask) {
+            if constexpr (IS_OPENGL) {
+                fast_bound_uniform_buffers.fill(0);
+            }
+            dirty_uniform_buffers.fill(~u32{0});
+            uniform_buffer_binding_sizes.fill({});
         }
     }
-    enabled_uniform_buffers[stage] = enabled;
+    enabled_uniform_buffer_masks = mask;
+    uniform_buffer_sizes = sizes;
 }
 
 template <class P>
-void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
-    enabled_compute_uniform_buffers = enabled;
+void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
+                                                  const ComputeUniformBufferSizes* sizes) {
+    enabled_compute_uniform_buffer_mask = mask;
+    compute_uniform_buffer_sizes = sizes;
 }
 
 template <class P>
@@ -657,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
 }
 
 template <class P>
+void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
+    enabled_texture_buffers[stage] = 0;
+    written_texture_buffers[stage] = 0;
+    image_texture_buffers[stage] = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
+                                               u32 size, PixelFormat format, bool is_written,
+                                               bool is_image) {
+    enabled_texture_buffers[stage] |= 1U << tbo_index;
+    written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
+    if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+        image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
+    }
+    texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
+}
+
+template <class P>
 void BufferCache<P>::UnbindComputeStorageBuffers() {
     enabled_compute_storage_buffers = 0;
     written_compute_storage_buffers = 0;
+    image_compute_texture_buffers = 0;
 }
 
 template <class P>
@@ -677,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
 }
 
 template <class P>
+void BufferCache<P>::UnbindComputeTextureBuffers() {
+    enabled_compute_texture_buffers = 0;
+    written_compute_texture_buffers = 0;
+    image_compute_texture_buffers = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
+                                              PixelFormat format, bool is_written, bool is_image) {
+    enabled_compute_texture_buffers |= 1U << tbo_index;
+    written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
+    if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+        image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
+    }
+    compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
+}
+
+template <class P>
 void BufferCache<P>::FlushCachedWrites() {
     for (const BufferId buffer_id : cached_write_buffer_ids) {
         slot_buffers[buffer_id].FlushCachedWrites();
@@ -901,7 +996,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
         dirty = std::exchange(dirty_uniform_buffers[stage], 0);
     }
     u32 binding_index = 0;
-    ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+    ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
         const bool needs_bind = ((dirty >> index) & 1) != 0;
         BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
         if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -915,7 +1010,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
                                                    bool needs_bind) {
     const Binding& binding = uniform_buffers[stage][index];
     const VAddr cpu_addr = binding.cpu_addr;
-    const u32 size = binding.size;
+    const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
     Buffer& buffer = slot_buffers[binding.buffer_id];
     TouchBuffer(buffer);
     const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
@@ -925,8 +1020,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
         if constexpr (IS_OPENGL) {
             if (runtime.HasFastBufferSubData()) {
                 // Fast path for Nvidia
-                if (!HasFastUniformBufferBound(stage, binding_index)) {
+                const bool should_fast_bind =
+                    !HasFastUniformBufferBound(stage, binding_index) ||
+                    uniform_buffer_binding_sizes[stage][binding_index] != size;
+                if (should_fast_bind) {
                     // We only have to bind when the currently bound buffer is not the fast version
+                    fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+                    uniform_buffer_binding_sizes[stage][binding_index] = size;
                     runtime.BindFastUniformBuffer(stage, binding_index, size);
                 }
                 const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -934,8 +1034,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
                 return;
             }
         }
-        fast_bound_uniform_buffers[stage] |= 1U << binding_index;
-
+        if constexpr (IS_OPENGL) {
+            fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+            uniform_buffer_binding_sizes[stage][binding_index] = size;
+        }
         // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
         const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
         cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
@@ -948,14 +1050,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
     }
     ++uniform_cache_shots[0];
 
-    if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
-        // Skip binding if it's not needed and if the bound buffer is not the fast version
-        // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+    // Skip binding if it's not needed and if the bound buffer is not the fast version
+    // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+    needs_bind |= HasFastUniformBufferBound(stage, binding_index);
+    if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+        needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
+    }
+    if (!needs_bind) {
         return;
     }
-    fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
-
     const u32 offset = buffer.Offset(cpu_addr);
+    if constexpr (IS_OPENGL) {
+        // Fast buffer will be unbound
+        fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
+
+        // Mark the index as dirty if offset doesn't match
+        const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
+        dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
+    }
+    if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+        uniform_buffer_binding_sizes[stage][binding_index] = size;
+    }
     if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
         runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
     } else {
@@ -985,6 +1100,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
 }
 
 template <class P>
+void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
+    ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
+        const TextureBufferBinding& binding = texture_buffers[stage][index];
+        Buffer& buffer = slot_buffers[binding.buffer_id];
+        const u32 size = binding.size;
+        SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+        const u32 offset = buffer.Offset(binding.cpu_addr);
+        const PixelFormat format = binding.format;
+        if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+            if (((image_texture_buffers[stage] >> index) & 1) != 0) {
+                runtime.BindImageBuffer(buffer, offset, size, format);
+            } else {
+                runtime.BindTextureBuffer(buffer, offset, size, format);
+            }
+        } else {
+            runtime.BindTextureBuffer(buffer, offset, size, format);
+        }
+    });
+}
+
+template <class P>
 void BufferCache<P>::BindHostTransformFeedbackBuffers() {
     if (maxwell3d.regs.tfb_enabled == 0) {
         return;
@@ -1006,13 +1143,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
     if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
         // Mark all uniform buffers as dirty
         dirty_uniform_buffers.fill(~u32{0});
+        fast_bound_uniform_buffers.fill(0);
     }
     u32 binding_index = 0;
-    ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+    ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
         const Binding& binding = compute_uniform_buffers[index];
         Buffer& buffer = slot_buffers[binding.buffer_id];
         TouchBuffer(buffer);
-        const u32 size = binding.size;
+        const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
         const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -1047,6 +1185,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
 }
 
 template <class P>
+void BufferCache<P>::BindHostComputeTextureBuffers() {
+    ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
+        const TextureBufferBinding& binding = compute_texture_buffers[index];
+        Buffer& buffer = slot_buffers[binding.buffer_id];
+        const u32 size = binding.size;
+        SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+        const u32 offset = buffer.Offset(binding.cpu_addr);
+        const PixelFormat format = binding.format;
+        if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+            if (((image_compute_texture_buffers >> index) & 1) != 0) {
+                runtime.BindImageBuffer(buffer, offset, size, format);
+            } else {
+                runtime.BindTextureBuffer(buffer, offset, size, format);
+            }
+        } else {
+            runtime.BindTextureBuffer(buffer, offset, size, format);
+        }
+    });
+}
+
+template <class P>
 void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
     if (is_indexed) {
         UpdateIndexBuffer();
@@ -1056,6 +1216,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
     for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
         UpdateUniformBuffers(stage);
         UpdateStorageBuffers(stage);
+        UpdateTextureBuffers(stage);
     }
 }
 
@@ -1063,6 +1224,7 @@ template <class P>
 void BufferCache<P>::DoUpdateComputeBuffers() {
     UpdateComputeUniformBuffers();
     UpdateComputeStorageBuffers();
+    UpdateComputeTextureBuffers();
 }
 
 template <class P>
@@ -1132,7 +1294,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
 
 template <class P>
 void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
-    ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+    ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
         Binding& binding = uniform_buffers[stage][index];
         if (binding.buffer_id) {
             // Already updated
@@ -1163,6 +1325,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
 }
 
 template <class P>
+void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
+    ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
+        Binding& binding = texture_buffers[stage][index];
+        binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+        // Mark buffer as written if needed
+        if (((written_texture_buffers[stage] >> index) & 1) != 0) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
+        }
+    });
+}
+
+template <class P>
 void BufferCache<P>::UpdateTransformFeedbackBuffers() {
     if (maxwell3d.regs.tfb_enabled == 0) {
         return;
@@ -1193,7 +1367,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
 
 template <class P>
 void BufferCache<P>::UpdateComputeUniformBuffers() {
-    ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+    ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
         Binding& binding = compute_uniform_buffers[index];
         binding = NULL_BINDING;
         const auto& launch_desc = kepler_compute.launch_description;
@@ -1214,11 +1388,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
     ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
         // Resolve buffer
         Binding& binding = compute_storage_buffers[index];
-        const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
-        binding.buffer_id = buffer_id;
+        binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
         // Mark as written if needed
         if (((written_compute_storage_buffers >> index) & 1) != 0) {
-            MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
+        }
+    });
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeTextureBuffers() {
+    ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
+        Binding& binding = compute_texture_buffers[index];
+        binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+        // Mark as written if needed
+        if (((written_compute_texture_buffers >> index) & 1) != 0) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
         }
     });
 }
@@ -1551,6 +1736,7 @@ template <class P>
 void BufferCache<P>::NotifyBufferDeletion() {
     if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
         dirty_uniform_buffers.fill(~u32{0});
+        uniform_buffer_binding_sizes.fill({});
     }
     auto& flags = maxwell3d.dirty.flags;
     flags[Dirty::IndexBuffer] = true;
@@ -1578,6 +1764,25 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
 }
 
 template <class P>
+typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
+    GPUVAddr gpu_addr, u32 size, PixelFormat format) {
+    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    TextureBufferBinding binding;
+    if (!cpu_addr || size == 0) {
+        binding.cpu_addr = 0;
+        binding.size = 0;
+        binding.buffer_id = NULL_BUFFER_ID;
+        binding.format = PixelFormat::Invalid;
+    } else {
+        binding.cpu_addr = *cpu_addr;
+        binding.size = size;
+        binding.buffer_id = BufferId{};
+        binding.format = format;
+    }
+    return binding;
+}
+
+template <class P>
 std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
     u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
     if (IsRangeGranular(cpu_addr, size) ||
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index 7149af290..b1be065c3 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
         FillBlock(table, OFF(zeta), NUM(zeta), flag);
     }
 }
+
+void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) {
+    FillBlock(tables[0], OFF(shader_config[0]),
+              NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders);
+}
 } // Anonymous namespace
 
 void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
@@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
     SetupIndexBuffer(tables);
     SetupDirtyDescriptors(tables);
     SetupDirtyRenderTargets(tables);
+    SetupDirtyShaders(tables);
 }
 
 } // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 702688ace..504465d3f 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -36,6 +36,8 @@ enum : u8 {
 
     IndexBuffer,
 
+    Shaders,
+
     LastCommonEntry,
 };
 
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
deleted file mode 100644
index f46e81bb7..000000000
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <type_traits>
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-#include "video_core/textures/texture.h"
-
-namespace Tegra::Engines {
-
-struct SamplerDescriptor {
-    union {
-        u32 raw = 0;
-        BitField<0, 2, Tegra::Shader::TextureType> texture_type;
-        BitField<2, 3, Tegra::Texture::ComponentType> r_type;
-        BitField<5, 1, u32> is_array;
-        BitField<6, 1, u32> is_buffer;
-        BitField<7, 1, u32> is_shadow;
-        BitField<8, 3, Tegra::Texture::ComponentType> g_type;
-        BitField<11, 3, Tegra::Texture::ComponentType> b_type;
-        BitField<14, 3, Tegra::Texture::ComponentType> a_type;
-        BitField<17, 7, Tegra::Texture::TextureFormat> format;
-    };
-
-    bool operator==(const SamplerDescriptor& rhs) const noexcept {
-        return raw == rhs.raw;
-    }
-
-    bool operator!=(const SamplerDescriptor& rhs) const noexcept {
-        return !operator==(rhs);
-    }
-
-    static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
-        using Tegra::Shader::TextureType;
-        SamplerDescriptor result;
-
-        result.format.Assign(tic.format.Value());
-        result.r_type.Assign(tic.r_type.Value());
-        result.g_type.Assign(tic.g_type.Value());
-        result.b_type.Assign(tic.b_type.Value());
-        result.a_type.Assign(tic.a_type.Value());
-
-        switch (tic.texture_type.Value()) {
-        case Tegra::Texture::TextureType::Texture1D:
-            result.texture_type.Assign(TextureType::Texture1D);
-            return result;
-        case Tegra::Texture::TextureType::Texture2D:
-            result.texture_type.Assign(TextureType::Texture2D);
-            return result;
-        case Tegra::Texture::TextureType::Texture3D:
-            result.texture_type.Assign(TextureType::Texture3D);
-            return result;
-        case Tegra::Texture::TextureType::TextureCubemap:
-            result.texture_type.Assign(TextureType::TextureCube);
-            return result;
-        case Tegra::Texture::TextureType::Texture1DArray:
-            result.texture_type.Assign(TextureType::Texture1D);
-            result.is_array.Assign(1);
-            return result;
-        case Tegra::Texture::TextureType::Texture2DArray:
-            result.texture_type.Assign(TextureType::Texture2D);
-            result.is_array.Assign(1);
-            return result;
-        case Tegra::Texture::TextureType::Texture1DBuffer:
-            result.texture_type.Assign(TextureType::Texture1D);
-            result.is_buffer.Assign(1);
-            return result;
-        case Tegra::Texture::TextureType::Texture2DNoMipmap:
-            result.texture_type.Assign(TextureType::Texture2D);
-            return result;
-        case Tegra::Texture::TextureType::TextureCubeArray:
-            result.texture_type.Assign(TextureType::TextureCube);
-            result.is_array.Assign(1);
-            return result;
-        default:
-            result.texture_type.Assign(TextureType::Texture2D);
-            return result;
-        }
-    }
-};
-static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
-
-class ConstBufferEngineInterface {
-public:
-    virtual ~ConstBufferEngineInterface() = default;
-    virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
-    virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
-    virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
-                                                    u64 offset) const = 0;
-    virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
-    virtual u32 GetBoundBuffer() const = 0;
-
-    virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
-    virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
-};
-
-} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a9b75091e..492b4c5a3 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -8,7 +8,6 @@
 #include "core/core.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
@@ -57,53 +56,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
     }
 }
 
-u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
-    ASSERT(stage == ShaderType::Compute);
-    const auto& buffer = launch_description.const_buffer_config[const_buffer];
-    u32 result;
-    std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
-    return result;
-}
-
-SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
-    return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
-}
-
-SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
-                                                       u64 offset) const {
-    ASSERT(stage == ShaderType::Compute);
-    const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
-    const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
-    return AccessSampler(memory_manager.Read<u32>(tex_info_address));
-}
-
-SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
-    const Texture::TextureHandle tex_handle{handle};
-    const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
-    const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
-
-    SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
-    result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
-    return result;
-}
-
-VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
-    return rasterizer->AccessGuestDriverProfile();
-}
-
-const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
-    return rasterizer->AccessGuestDriverProfile();
-}
-
 void KeplerCompute::ProcessLaunch() {
     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
                                    LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
-
-    const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
-    LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
-
-    rasterizer->DispatchCompute(code_addr);
+    rasterizer->DispatchCompute();
 }
 
 Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7c40cba38..f8b8d06ac 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,10 +10,8 @@
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/engine_interface.h"
 #include "video_core/engines/engine_upload.h"
-#include "video_core/engines/shader_type.h"
 #include "video_core/gpu.h"
 #include "video_core/textures/texture.h"
 
@@ -40,7 +38,7 @@ namespace Tegra::Engines {
 #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
 
-class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
+class KeplerCompute final : public EngineInterface {
 public:
     explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
     ~KeplerCompute();
@@ -209,23 +207,6 @@ public:
     void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
                          u32 methods_pending) override;
 
-    u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
-
-    SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
-
-    SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
-                                            u64 offset) const override;
-
-    SamplerDescriptor AccessSampler(u32 handle) const override;
-
-    u32 GetBoundBuffer() const override {
-        return regs.tex_cb_index;
-    }
-
-    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
-
-    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
-
 private:
     void ProcessLaunch();
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index aab6b8f7a..b18b8a02a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -8,7 +8,6 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
@@ -670,42 +669,4 @@ void Maxwell3D::ProcessClearBuffers() {
     rasterizer->Clear();
 }
 
-u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
-    ASSERT(stage != ShaderType::Compute);
-    const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
-    const auto& buffer = shader_stage.const_buffers[const_buffer];
-    return memory_manager.Read<u32>(buffer.address + offset);
-}
-
-SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
-    return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
-}
-
-SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
-                                                   u64 offset) const {
-    ASSERT(stage != ShaderType::Compute);
-    const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
-    const auto& tex_info_buffer = shader.const_buffers[const_buffer];
-    const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
-    return AccessSampler(memory_manager.Read<u32>(tex_info_address));
-}
-
-SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
-    const Texture::TextureHandle tex_handle{handle};
-    const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
-    const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
-
-    SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
-    result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
-    return result;
-}
-
-VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
-    return rasterizer->AccessGuestDriverProfile();
-}
-
-const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
-    return rasterizer->AccessGuestDriverProfile();
-}
-
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 335383955..1aa43523a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -17,11 +17,9 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/const_buffer_info.h"
 #include "video_core/engines/engine_interface.h"
 #include "video_core/engines/engine_upload.h"
-#include "video_core/engines/shader_type.h"
 #include "video_core/gpu.h"
 #include "video_core/macro/macro.h"
 #include "video_core/textures/texture.h"
@@ -49,7 +47,7 @@ namespace Tegra::Engines {
 #define MAXWELL3D_REG_INDEX(field_name)                                                            \
     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
 
-class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
+class Maxwell3D final : public EngineInterface {
 public:
     explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
     ~Maxwell3D();
@@ -307,10 +305,6 @@ public:
                 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
             }
 
-            bool IsConstant() const {
-                return constant;
-            }
-
             bool IsValid() const {
                 return size != Size::Invalid;
             }
@@ -912,7 +906,11 @@ public:
 
                 u32 fill_rectangle;
 
-                INSERT_PADDING_WORDS_NOINIT(0x8);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
+
+                u32 conservative_raster_enable;
+
+                INSERT_PADDING_WORDS_NOINIT(0x5);
 
                 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
 
@@ -959,7 +957,11 @@ public:
 
                 SamplerIndex sampler_index;
 
-                INSERT_PADDING_WORDS_NOINIT(0x25);
+                INSERT_PADDING_WORDS_NOINIT(0x2);
+
+                std::array<u32, 8> gp_passthrough_mask;
+
+                INSERT_PADDING_WORDS_NOINIT(0x1B);
 
                 u32 depth_test_enable;
 
@@ -1152,7 +1154,11 @@ public:
                     u32 index;
                 } primitive_restart;
 
-                INSERT_PADDING_WORDS_NOINIT(0x5F);
+                INSERT_PADDING_WORDS_NOINIT(0xE);
+
+                u32 provoking_vertex_last;
+
+                INSERT_PADDING_WORDS_NOINIT(0x50);
 
                 struct {
                     u32 start_addr_high;
@@ -1424,23 +1430,6 @@ public:
 
     void FlushMMEInlineDraw();
 
-    u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
-
-    SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
-
-    SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
-                                            u64 offset) const override;
-
-    SamplerDescriptor AccessSampler(u32 handle) const override;
-
-    u32 GetBoundBuffer() const override {
-        return regs.tex_cb_index;
-    }
-
-    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
-
-    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
-
     bool ShouldExecute() const {
         return execute_on;
     }
@@ -1630,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(render_area, 0x3FD);
 ASSERT_REG_POSITION(clear_flags, 0x43E);
 ASSERT_REG_POSITION(fill_rectangle, 0x44F);
+ASSERT_REG_POSITION(conservative_raster_enable, 0x452);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
 ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
 ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
@@ -1638,6 +1628,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a);
 ASSERT_REG_POSITION(zeta_height, 0x48b);
 ASSERT_REG_POSITION(zeta_depth, 0x48c);
 ASSERT_REG_POSITION(sampler_index, 0x48D);
+ASSERT_REG_POSITION(gp_passthrough_mask, 0x490);
 ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
 ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
 ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1690,6 +1681,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
 ASSERT_REG_POSITION(primitive_restart, 0x591);
+ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c51776466..c7ec1eac9 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
 
     // Optimized path for micro copies.
     const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
-    if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) {
+    if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
+        regs.src_params.height > GOB_SIZE_Y) {
         FastCopyBlockLinearToPitch();
         return;
     }
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
deleted file mode 100644
index 8b45f1b62..000000000
--- a/src/video_core/engines/shader_bytecode.h
+++ /dev/null
@@ -1,2298 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <bitset>
-#include <optional>
-#include <tuple>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-
-namespace Tegra::Shader {
-
-struct Register {
-    /// Number of registers
-    static constexpr std::size_t NumRegisters = 256;
-
-    /// Register 255 is special cased to always be 0
-    static constexpr std::size_t ZeroIndex = 255;
-
-    enum class Size : u64 {
-        Byte = 0,
-        Short = 1,
-        Word = 2,
-        Long = 3,
-    };
-
-    constexpr Register() = default;
-
-    constexpr Register(u64 value_) : value(value_) {}
-
-    [[nodiscard]] constexpr operator u64() const {
-        return value;
-    }
-
-    template <typename T>
-    [[nodiscard]] constexpr u64 operator-(const T& oth) const {
-        return value - oth;
-    }
-
-    template <typename T>
-    [[nodiscard]] constexpr u64 operator&(const T& oth) const {
-        return value & oth;
-    }
-
-    [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
-        return value & oth.value;
-    }
-
-    [[nodiscard]] constexpr u64 operator~() const {
-        return ~value;
-    }
-
-    [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
-        elem = (value + elem) & 3;
-        return (value & ~3) + elem;
-    }
-
-private:
-    u64 value{};
-};
-
-enum class AttributeSize : u64 {
-    Word = 0,
-    DoubleWord = 1,
-    TripleWord = 2,
-    QuadWord = 3,
-};
-
-union Attribute {
-    Attribute() = default;
-
-    constexpr explicit Attribute(u64 value_) : value(value_) {}
-
-    enum class Index : u64 {
-        LayerViewportPointSize = 6,
-        Position = 7,
-        Attribute_0 = 8,
-        Attribute_31 = 39,
-        FrontColor = 40,
-        FrontSecondaryColor = 41,
-        BackColor = 42,
-        BackSecondaryColor = 43,
-        ClipDistances0123 = 44,
-        ClipDistances4567 = 45,
-        PointCoord = 46,
-        // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
-        // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
-        // shader.
-        TessCoordInstanceIDVertexID = 47,
-        TexCoord_0 = 48,
-        TexCoord_7 = 55,
-        // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
-        // shader. It is unknown what the other values contain.
-        FrontFacing = 63,
-    };
-
-    union {
-        BitField<20, 10, u64> immediate;
-        BitField<22, 2, u64> element;
-        BitField<24, 6, Index> index;
-        BitField<31, 1, u64> patch;
-        BitField<47, 3, AttributeSize> size;
-
-        [[nodiscard]] bool IsPhysical() const {
-            return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
-        }
-    } fmt20;
-
-    union {
-        BitField<30, 2, u64> element;
-        BitField<32, 6, Index> index;
-    } fmt28;
-
-    BitField<39, 8, u64> reg;
-    u64 value{};
-};
-
-union Sampler {
-    Sampler() = default;
-
-    constexpr explicit Sampler(u64 value_) : value(value_) {}
-
-    enum class Index : u64 {
-        Sampler_0 = 8,
-    };
-
-    BitField<36, 13, Index> index;
-    u64 value{};
-};
-
-union Image {
-    Image() = default;
-
-    constexpr explicit Image(u64 value_) : value{value_} {}
-
-    BitField<36, 13, u64> index;
-    u64 value;
-};
-
-} // namespace Tegra::Shader
-
-namespace std {
-
-// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
-template <>
-struct make_unsigned<Tegra::Shader::Attribute> {
-    using type = Tegra::Shader::Attribute;
-};
-
-template <>
-struct make_unsigned<Tegra::Shader::Register> {
-    using type = Tegra::Shader::Register;
-};
-
-} // namespace std
-
-namespace Tegra::Shader {
-
-enum class Pred : u64 {
-    UnusedIndex = 0x7,
-    NeverExecute = 0xF,
-};
-
-enum class PredCondition : u64 {
-    F = 0,    // Always false
-    LT = 1,   // Ordered less than
-    EQ = 2,   // Ordered equal
-    LE = 3,   // Ordered less than or equal
-    GT = 4,   // Ordered greater than
-    NE = 5,   // Ordered not equal
-    GE = 6,   // Ordered greater than or equal
-    NUM = 7,  // Ordered
-    NAN_ = 8, // Unordered
-    LTU = 9,  // Unordered less than
-    EQU = 10, // Unordered equal
-    LEU = 11, // Unordered less than or equal
-    GTU = 12, // Unordered greater than
-    NEU = 13, // Unordered not equal
-    GEU = 14, // Unordered greater than or equal
-    T = 15,   // Always true
-};
-
-enum class PredOperation : u64 {
-    And = 0,
-    Or = 1,
-    Xor = 2,
-};
-
-enum class LogicOperation : u64 {
-    And = 0,
-    Or = 1,
-    Xor = 2,
-    PassB = 3,
-};
-
-enum class SubOp : u64 {
-    Cos = 0x0,
-    Sin = 0x1,
-    Ex2 = 0x2,
-    Lg2 = 0x3,
-    Rcp = 0x4,
-    Rsq = 0x5,
-    Sqrt = 0x8,
-};
-
-enum class F2iRoundingOp : u64 {
-    RoundEven = 0,
-    Floor = 1,
-    Ceil = 2,
-    Trunc = 3,
-};
-
-enum class F2fRoundingOp : u64 {
-    None = 0,
-    Pass = 3,
-    Round = 8,
-    Floor = 9,
-    Ceil = 10,
-    Trunc = 11,
-};
-
-enum class AtomicOp : u64 {
-    Add = 0,
-    Min = 1,
-    Max = 2,
-    Inc = 3,
-    Dec = 4,
-    And = 5,
-    Or = 6,
-    Xor = 7,
-    Exch = 8,
-    SafeAdd = 10,
-};
-
-enum class GlobalAtomicType : u64 {
-    U32 = 0,
-    S32 = 1,
-    U64 = 2,
-    F32_FTZ_RN = 3,
-    F16x2_FTZ_RN = 4,
-    S64 = 5,
-};
-
-enum class UniformType : u64 {
-    UnsignedByte = 0,
-    SignedByte = 1,
-    UnsignedShort = 2,
-    SignedShort = 3,
-    Single = 4,
-    Double = 5,
-    Quad = 6,
-    UnsignedQuad = 7,
-};
-
-enum class StoreType : u64 {
-    Unsigned8 = 0,
-    Signed8 = 1,
-    Unsigned16 = 2,
-    Signed16 = 3,
-    Bits32 = 4,
-    Bits64 = 5,
-    Bits128 = 6,
-};
-
-enum class AtomicType : u64 {
-    U32 = 0,
-    S32 = 1,
-    U64 = 2,
-    S64 = 3,
-};
-
-enum class IMinMaxExchange : u64 {
-    None = 0,
-    XLo = 1,
-    XMed = 2,
-    XHi = 3,
-};
-
-enum class VideoType : u64 {
-    Size16_Low = 0,
-    Size16_High = 1,
-    Size32 = 2,
-    Invalid = 3,
-};
-
-enum class VmadShr : u64 {
-    Shr7 = 1,
-    Shr15 = 2,
-};
-
-enum class VmnmxType : u64 {
-    Bits8,
-    Bits16,
-    Bits32,
-};
-
-enum class VmnmxOperation : u64 {
-    Mrg_16H = 0,
-    Mrg_16L = 1,
-    Mrg_8B0 = 2,
-    Mrg_8B2 = 3,
-    Acc = 4,
-    Min = 5,
-    Max = 6,
-    Nop = 7,
-};
-
-enum class XmadMode : u64 {
-    None = 0,
-    CLo = 1,
-    CHi = 2,
-    CSfu = 3,
-    CBcc = 4,
-};
-
-enum class IAdd3Mode : u64 {
-    None = 0,
-    RightShift = 1,
-    LeftShift = 2,
-};
-
-enum class IAdd3Height : u64 {
-    None = 0,
-    LowerHalfWord = 1,
-    UpperHalfWord = 2,
-};
-
-enum class FlowCondition : u64 {
-    Always = 0xF,
-    Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
-};
-
-enum class ConditionCode : u64 {
-    F = 0,
-    LT = 1,
-    EQ = 2,
-    LE = 3,
-    GT = 4,
-    NE = 5,
-    GE = 6,
-    Num = 7,
-    Nan = 8,
-    LTU = 9,
-    EQU = 10,
-    LEU = 11,
-    GTU = 12,
-    NEU = 13,
-    GEU = 14,
-    T = 15,
-    OFF = 16,
-    LO = 17,
-    SFF = 18,
-    LS = 19,
-    HI = 20,
-    SFT = 21,
-    HS = 22,
-    OFT = 23,
-    CSM_TA = 24,
-    CSM_TR = 25,
-    CSM_MX = 26,
-    FCSM_TA = 27,
-    FCSM_TR = 28,
-    FCSM_MX = 29,
-    RLE = 30,
-    RGT = 31,
-};
-
-enum class PredicateResultMode : u64 {
-    None = 0x0,
-    NotZero = 0x3,
-};
-
-enum class TextureType : u64 {
-    Texture1D = 0,
-    Texture2D = 1,
-    Texture3D = 2,
-    TextureCube = 3,
-};
-
-enum class TextureQueryType : u64 {
-    Dimension = 1,
-    TextureType = 2,
-    SamplePosition = 5,
-    Filter = 16,
-    LevelOfDetail = 18,
-    Wrap = 20,
-    BorderColor = 22,
-};
-
-enum class TextureProcessMode : u64 {
-    None = 0,
-    LZ = 1,  // Load LOD of zero.
-    LB = 2,  // Load Bias.
-    LL = 3,  // Load LOD.
-    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
-    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL.
-};
-
-enum class TextureMiscMode : u64 {
-    DC,
-    AOFFI, // Uses Offset
-    NDV,
-    NODEP,
-    MZ,
-    PTP,
-};
-
-enum class SurfaceDataMode : u64 {
-    P = 0,
-    D_BA = 1,
-};
-
-enum class OutOfBoundsStore : u64 {
-    Ignore = 0,
-    Clamp = 1,
-    Trap = 2,
-};
-
-enum class ImageType : u64 {
-    Texture1D = 0,
-    TextureBuffer = 1,
-    Texture1DArray = 2,
-    Texture2D = 3,
-    Texture2DArray = 4,
-    Texture3D = 5,
-};
-
-enum class IsberdMode : u64 {
-    None = 0,
-    Patch = 1,
-    Prim = 2,
-    Attr = 3,
-};
-
-enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
-
-enum class MembarType : u64 {
-    CTA = 0,
-    GL = 1,
-    SYS = 2,
-    VC = 3,
-};
-
-enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
-
-enum class HalfType : u64 {
-    H0_H1 = 0,
-    F32 = 1,
-    H0_H0 = 2,
-    H1_H1 = 3,
-};
-
-enum class HalfMerge : u64 {
-    H0_H1 = 0,
-    F32 = 1,
-    Mrg_H0 = 2,
-    Mrg_H1 = 3,
-};
-
-enum class HalfPrecision : u64 {
-    None = 0,
-    FTZ = 1,
-    FMZ = 2,
-};
-
-enum class R2pMode : u64 {
-    Pr = 0,
-    Cc = 1,
-};
-
-enum class IpaInterpMode : u64 {
-    Pass = 0,
-    Multiply = 1,
-    Constant = 2,
-    Sc = 3,
-};
-
-enum class IpaSampleMode : u64 {
-    Default = 0,
-    Centroid = 1,
-    Offset = 2,
-};
-
-enum class LmemLoadCacheManagement : u64 {
-    Default = 0,
-    LU = 1,
-    CI = 2,
-    CV = 3,
-};
-
-enum class StoreCacheManagement : u64 {
-    Default = 0,
-    CG = 1,
-    CS = 2,
-    WT = 3,
-};
-
-struct IpaMode {
-    IpaInterpMode interpolation_mode;
-    IpaSampleMode sampling_mode;
-
-    [[nodiscard]] bool operator==(const IpaMode& a) const {
-        return std::tie(interpolation_mode, sampling_mode) ==
-               std::tie(a.interpolation_mode, a.sampling_mode);
-    }
-    [[nodiscard]] bool operator!=(const IpaMode& a) const {
-        return !operator==(a);
-    }
-    [[nodiscard]] bool operator<(const IpaMode& a) const {
-        return std::tie(interpolation_mode, sampling_mode) <
-               std::tie(a.interpolation_mode, a.sampling_mode);
-    }
-};
-
-enum class SystemVariable : u64 {
-    LaneId = 0x00,
-    VirtCfg = 0x02,
-    VirtId = 0x03,
-    Pm0 = 0x04,
-    Pm1 = 0x05,
-    Pm2 = 0x06,
-    Pm3 = 0x07,
-    Pm4 = 0x08,
-    Pm5 = 0x09,
-    Pm6 = 0x0a,
-    Pm7 = 0x0b,
-    OrderingTicket = 0x0f,
-    PrimType = 0x10,
-    InvocationId = 0x11,
-    Ydirection = 0x12,
-    ThreadKill = 0x13,
-    ShaderType = 0x14,
-    DirectBeWriteAddressLow = 0x15,
-    DirectBeWriteAddressHigh = 0x16,
-    DirectBeWriteEnabled = 0x17,
-    MachineId0 = 0x18,
-    MachineId1 = 0x19,
-    MachineId2 = 0x1a,
-    MachineId3 = 0x1b,
-    Affinity = 0x1c,
-    InvocationInfo = 0x1d,
-    WscaleFactorXY = 0x1e,
-    WscaleFactorZ = 0x1f,
-    Tid = 0x20,
-    TidX = 0x21,
-    TidY = 0x22,
-    TidZ = 0x23,
-    CtaParam = 0x24,
-    CtaIdX = 0x25,
-    CtaIdY = 0x26,
-    CtaIdZ = 0x27,
-    NtId = 0x28,
-    CirQueueIncrMinusOne = 0x29,
-    Nlatc = 0x2a,
-    SmSpaVersion = 0x2c,
-    MultiPassShaderInfo = 0x2d,
-    LwinHi = 0x2e,
-    SwinHi = 0x2f,
-    SwinLo = 0x30,
-    SwinSz = 0x31,
-    SmemSz = 0x32,
-    SmemBanks = 0x33,
-    LwinLo = 0x34,
-    LwinSz = 0x35,
-    LmemLosz = 0x36,
-    LmemHioff = 0x37,
-    EqMask = 0x38,
-    LtMask = 0x39,
-    LeMask = 0x3a,
-    GtMask = 0x3b,
-    GeMask = 0x3c,
-    RegAlloc = 0x3d,
-    CtxAddr = 0x3e,      // .fmask = F_SM50
-    BarrierAlloc = 0x3e, // .fmask = F_SM60
-    GlobalErrorStatus = 0x40,
-    WarpErrorStatus = 0x42,
-    WarpErrorStatusClear = 0x43,
-    PmHi0 = 0x48,
-    PmHi1 = 0x49,
-    PmHi2 = 0x4a,
-    PmHi3 = 0x4b,
-    PmHi4 = 0x4c,
-    PmHi5 = 0x4d,
-    PmHi6 = 0x4e,
-    PmHi7 = 0x4f,
-    ClockLo = 0x50,
-    ClockHi = 0x51,
-    GlobalTimerLo = 0x52,
-    GlobalTimerHi = 0x53,
-    HwTaskId = 0x60,
-    CircularQueueEntryIndex = 0x61,
-    CircularQueueEntryAddressLow = 0x62,
-    CircularQueueEntryAddressHigh = 0x63,
-};
-
-enum class PhysicalAttributeDirection : u64 {
-    Input = 0,
-    Output = 1,
-};
-
-enum class VoteOperation : u64 {
-    All = 0, // allThreadsNV
-    Any = 1, // anyThreadNV
-    Eq = 2,  // allThreadsEqualNV
-};
-
-enum class ImageAtomicOperationType : u64 {
-    U32 = 0,
-    S32 = 1,
-    U64 = 2,
-    F32 = 3,
-    S64 = 5,
-    SD32 = 6,
-    SD64 = 7,
-};
-
-enum class ImageAtomicOperation : u64 {
-    Add = 0,
-    Min = 1,
-    Max = 2,
-    Inc = 3,
-    Dec = 4,
-    And = 5,
-    Or = 6,
-    Xor = 7,
-    Exch = 8,
-};
-
-enum class ShuffleOperation : u64 {
-    Idx = 0,  // shuffleNV
-    Up = 1,   // shuffleUpNV
-    Down = 2, // shuffleDownNV
-    Bfly = 3, // shuffleXorNV
-};
-
-enum class ShfType : u64 {
-    Bits32 = 0,
-    U64 = 2,
-    S64 = 3,
-};
-
-enum class ShfXmode : u64 {
-    None = 0,
-    HI = 1,
-    X = 2,
-    XHI = 3,
-};
-
-union Instruction {
-    constexpr Instruction& operator=(const Instruction& instr) {
-        value = instr.value;
-        return *this;
-    }
-
-    constexpr Instruction(u64 value_) : value{value_} {}
-    constexpr Instruction(const Instruction& instr) : value(instr.value) {}
-
-    [[nodiscard]] constexpr bool Bit(u64 offset) const {
-        return ((value >> offset) & 1) != 0;
-    }
-
-    BitField<0, 8, Register> gpr0;
-    BitField<8, 8, Register> gpr8;
-    union {
-        BitField<16, 4, Pred> full_pred;
-        BitField<16, 3, u64> pred_index;
-    } pred;
-    BitField<19, 1, u64> negate_pred;
-    BitField<20, 8, Register> gpr20;
-    BitField<20, 4, SubOp> sub_op;
-    BitField<28, 8, Register> gpr28;
-    BitField<39, 8, Register> gpr39;
-    BitField<48, 16, u64> opcode;
-
-    union {
-        BitField<8, 5, ConditionCode> cc;
-        BitField<13, 1, u64> trigger;
-    } nop;
-
-    union {
-        BitField<48, 2, VoteOperation> operation;
-        BitField<45, 3, u64> dest_pred;
-        BitField<39, 3, u64> value;
-        BitField<42, 1, u64> negate_value;
-    } vote;
-
-    union {
-        BitField<30, 2, ShuffleOperation> operation;
-        BitField<48, 3, u64> pred48;
-        BitField<28, 1, u64> is_index_imm;
-        BitField<29, 1, u64> is_mask_imm;
-        BitField<20, 5, u64> index_imm;
-        BitField<34, 13, u64> mask_imm;
-    } shfl;
-
-    union {
-        BitField<44, 1, u64> ftz;
-        BitField<39, 2, u64> tab5cb8_2;
-        BitField<38, 1, u64> ndv;
-        BitField<47, 1, u64> cc;
-        BitField<28, 8, u64> swizzle;
-    } fswzadd;
-
-    union {
-        BitField<8, 8, Register> gpr;
-        BitField<20, 24, s64> offset;
-    } gmem;
-
-    union {
-        BitField<20, 16, u64> imm20_16;
-        BitField<20, 19, u64> imm20_19;
-        BitField<20, 32, s64> imm20_32;
-        BitField<45, 1, u64> negate_b;
-        BitField<46, 1, u64> abs_a;
-        BitField<48, 1, u64> negate_a;
-        BitField<49, 1, u64> abs_b;
-        BitField<50, 1, u64> saturate_d;
-        BitField<56, 1, u64> negate_imm;
-
-        union {
-            BitField<39, 3, u64> pred;
-            BitField<42, 1, u64> negate_pred;
-        } fmnmx;
-
-        union {
-            BitField<39, 1, u64> invert_a;
-            BitField<40, 1, u64> invert_b;
-            BitField<41, 2, LogicOperation> operation;
-            BitField<44, 2, PredicateResultMode> pred_result_mode;
-            BitField<48, 3, Pred> pred48;
-        } lop;
-
-        union {
-            BitField<53, 2, LogicOperation> operation;
-            BitField<55, 1, u64> invert_a;
-            BitField<56, 1, u64> invert_b;
-        } lop32i;
-
-        union {
-            BitField<28, 8, u64> imm_lut28;
-            BitField<48, 8, u64> imm_lut48;
-
-            [[nodiscard]] u32 GetImmLut28() const {
-                return static_cast<u32>(imm_lut28);
-            }
-
-            [[nodiscard]] u32 GetImmLut48() const {
-                return static_cast<u32>(imm_lut48);
-            }
-        } lop3;
-
-        [[nodiscard]] u16 GetImm20_16() const {
-            return static_cast<u16>(imm20_16);
-        }
-
-        [[nodiscard]] u32 GetImm20_19() const {
-            u32 imm{static_cast<u32>(imm20_19)};
-            imm <<= 12;
-            imm |= negate_imm ? 0x80000000 : 0;
-            return imm;
-        }
-
-        [[nodiscard]] u32 GetImm20_32() const {
-            return static_cast<u32>(imm20_32);
-        }
-
-        [[nodiscard]] s32 GetSignedImm20_20() const {
-            const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
-            // Sign extend the 20-bit value.
-            const auto mask = 1U << (20 - 1);
-            return static_cast<s32>((immediate ^ mask) - mask);
-        }
-    } alu;
-
-    union {
-        BitField<38, 1, u64> idx;
-        BitField<51, 1, u64> saturate;
-        BitField<52, 2, IpaSampleMode> sample_mode;
-        BitField<54, 2, IpaInterpMode> interp_mode;
-    } ipa;
-
-    union {
-        BitField<39, 2, u64> tab5cb8_2;
-        BitField<41, 3, u64> postfactor;
-        BitField<44, 2, u64> tab5c68_0;
-        BitField<48, 1, u64> negate_b;
-    } fmul;
-
-    union {
-        BitField<55, 1, u64> saturate;
-    } fmul32;
-
-    union {
-        BitField<52, 1, u64> generates_cc;
-    } op_32;
-
-    union {
-        BitField<48, 1, u64> is_signed;
-    } shift;
-
-    union {
-        BitField<39, 1, u64> wrap;
-    } shr;
-
-    union {
-        BitField<37, 2, ShfType> type;
-        BitField<48, 2, ShfXmode> xmode;
-        BitField<50, 1, u64> wrap;
-        BitField<20, 6, u64> immediate;
-    } shf;
-
-    union {
-        BitField<39, 5, u64> shift_amount;
-        BitField<48, 1, u64> negate_b;
-        BitField<49, 1, u64> negate_a;
-    } alu_integer;
-
-    union {
-        BitField<43, 1, u64> x;
-    } iadd;
-
-    union {
-        BitField<39, 1, u64> ftz;
-        BitField<32, 1, u64> saturate;
-        BitField<49, 2, HalfMerge> merge;
-
-        BitField<44, 1, u64> abs_a;
-        BitField<47, 2, HalfType> type_a;
-
-        BitField<30, 1, u64> abs_b;
-        BitField<28, 2, HalfType> type_b;
-
-        BitField<35, 2, HalfType> type_c;
-    } alu_half;
-
-    union {
-        BitField<39, 2, HalfPrecision> precision;
-        BitField<39, 1, u64> ftz;
-        BitField<52, 1, u64> saturate;
-        BitField<49, 2, HalfMerge> merge;
-
-        BitField<43, 1, u64> negate_a;
-        BitField<44, 1, u64> abs_a;
-        BitField<47, 2, HalfType> type_a;
-    } alu_half_imm;
-
-    union {
-        BitField<29, 1, u64> first_negate;
-        BitField<20, 9, u64> first;
-
-        BitField<56, 1, u64> second_negate;
-        BitField<30, 9, u64> second;
-
-        [[nodiscard]] u32 PackImmediates() const {
-            // Immediates are half floats shifted.
-            constexpr u32 imm_shift = 6;
-            return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
-        }
-    } half_imm;
-
-    union {
-        union {
-            BitField<37, 2, HalfPrecision> precision;
-            BitField<32, 1, u64> saturate;
-
-            BitField<31, 1, u64> negate_b;
-            BitField<30, 1, u64> negate_c;
-            BitField<35, 2, HalfType> type_c;
-        } rr;
-
-        BitField<57, 2, HalfPrecision> precision;
-        BitField<52, 1, u64> saturate;
-
-        BitField<49, 2, HalfMerge> merge;
-
-        BitField<47, 2, HalfType> type_a;
-
-        BitField<56, 1, u64> negate_b;
-        BitField<28, 2, HalfType> type_b;
-
-        BitField<51, 1, u64> negate_c;
-        BitField<53, 2, HalfType> type_reg39;
-    } hfma2;
-
-    union {
-        BitField<40, 1, u64> invert;
-    } popc;
-
-    union {
-        BitField<41, 1, u64> sh;
-        BitField<40, 1, u64> invert;
-        BitField<48, 1, u64> is_signed;
-    } flo;
-
-    union {
-        BitField<39, 3, u64> pred;
-        BitField<42, 1, u64> neg_pred;
-    } sel;
-
-    union {
-        BitField<39, 3, u64> pred;
-        BitField<42, 1, u64> negate_pred;
-        BitField<43, 2, IMinMaxExchange> exchange;
-        BitField<48, 1, u64> is_signed;
-    } imnmx;
-
-    union {
-        BitField<31, 2, IAdd3Height> height_c;
-        BitField<33, 2, IAdd3Height> height_b;
-        BitField<35, 2, IAdd3Height> height_a;
-        BitField<37, 2, IAdd3Mode> mode;
-        BitField<49, 1, u64> neg_c;
-        BitField<50, 1, u64> neg_b;
-        BitField<51, 1, u64> neg_a;
-    } iadd3;
-
-    union {
-        BitField<54, 1, u64> saturate;
-        BitField<56, 1, u64> negate_a;
-    } iadd32i;
-
-    union {
-        BitField<53, 1, u64> negate_b;
-        BitField<54, 1, u64> abs_a;
-        BitField<56, 1, u64> negate_a;
-        BitField<57, 1, u64> abs_b;
-    } fadd32i;
-
-    union {
-        BitField<40, 1, u64> brev;
-        BitField<47, 1, u64> rd_cc;
-        BitField<48, 1, u64> is_signed;
-    } bfe;
-
-    union {
-        BitField<48, 3, u64> pred48;
-
-        union {
-            BitField<20, 20, u64> entry_a;
-            BitField<39, 5, u64> entry_b;
-            BitField<45, 1, u64> neg;
-            BitField<46, 1, u64> uses_cc;
-        } imm;
-
-        union {
-            BitField<20, 14, u64> cb_index;
-            BitField<34, 5, u64> cb_offset;
-            BitField<56, 1, u64> neg;
-            BitField<57, 1, u64> uses_cc;
-        } hi;
-
-        union {
-            BitField<20, 14, u64> cb_index;
-            BitField<34, 5, u64> cb_offset;
-            BitField<39, 5, u64> entry_a;
-            BitField<45, 1, u64> neg;
-            BitField<46, 1, u64> uses_cc;
-        } rz;
-
-        union {
-            BitField<39, 5, u64> entry_a;
-            BitField<45, 1, u64> neg;
-            BitField<46, 1, u64> uses_cc;
-        } r1;
-
-        union {
-            BitField<28, 8, u64> entry_a;
-            BitField<37, 1, u64> neg;
-            BitField<38, 1, u64> uses_cc;
-        } r2;
-
-    } lea;
-
-    union {
-        BitField<0, 5, FlowCondition> cond;
-    } flow;
-
-    union {
-        BitField<47, 1, u64> cc;
-        BitField<48, 1, u64> negate_b;
-        BitField<49, 1, u64> negate_c;
-        BitField<51, 2, u64> tab5980_1;
-        BitField<53, 2, u64> tab5980_0;
-    } ffma;
-
-    union {
-        BitField<48, 3, UniformType> type;
-        BitField<44, 2, u64> unknown;
-    } ld_c;
-
-    union {
-        BitField<48, 3, StoreType> type;
-    } ldst_sl;
-
-    union {
-        BitField<44, 2, u64> unknown;
-    } ld_l;
-
-    union {
-        BitField<44, 2, StoreCacheManagement> cache_management;
-    } st_l;
-
-    union {
-        BitField<48, 3, UniformType> type;
-        BitField<46, 2, u64> cache_mode;
-    } ldg;
-
-    union {
-        BitField<48, 3, UniformType> type;
-        BitField<46, 2, u64> cache_mode;
-    } stg;
-
-    union {
-        BitField<23, 3, AtomicOp> operation;
-        BitField<48, 1, u64> extended;
-        BitField<20, 3, GlobalAtomicType> type;
-    } red;
-
-    union {
-        BitField<52, 4, AtomicOp> operation;
-        BitField<49, 3, GlobalAtomicType> type;
-        BitField<28, 20, s64> offset;
-    } atom;
-
-    union {
-        BitField<52, 4, AtomicOp> operation;
-        BitField<28, 2, AtomicType> type;
-        BitField<30, 22, s64> offset;
-
-        [[nodiscard]] s32 GetImmediateOffset() const {
-            return static_cast<s32>(offset << 2);
-        }
-    } atoms;
-
-    union {
-        BitField<32, 1, PhysicalAttributeDirection> direction;
-        BitField<47, 3, AttributeSize> size;
-        BitField<20, 11, u64> address;
-    } al2p;
-
-    union {
-        BitField<53, 3, UniformType> type;
-        BitField<52, 1, u64> extended;
-    } generic;
-
-    union {
-        BitField<0, 3, u64> pred0;
-        BitField<3, 3, u64> pred3;
-        BitField<6, 1, u64> neg_b;
-        BitField<7, 1, u64> abs_a;
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred;
-        BitField<43, 1, u64> neg_a;
-        BitField<44, 1, u64> abs_b;
-        BitField<45, 2, PredOperation> op;
-        BitField<47, 1, u64> ftz;
-        BitField<48, 4, PredCondition> cond;
-    } fsetp;
-
-    union {
-        BitField<0, 3, u64> pred0;
-        BitField<3, 3, u64> pred3;
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred;
-        BitField<45, 2, PredOperation> op;
-        BitField<48, 1, u64> is_signed;
-        BitField<49, 3, PredCondition> cond;
-    } isetp;
-
-    union {
-        BitField<48, 1, u64> is_signed;
-        BitField<49, 3, PredCondition> cond;
-    } icmp;
-
-    union {
-        BitField<0, 3, u64> pred0;
-        BitField<3, 3, u64> pred3;
-        BitField<12, 3, u64> pred12;
-        BitField<15, 1, u64> neg_pred12;
-        BitField<24, 2, PredOperation> cond;
-        BitField<29, 3, u64> pred29;
-        BitField<32, 1, u64> neg_pred29;
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred39;
-        BitField<45, 2, PredOperation> op;
-    } psetp;
-
-    union {
-        BitField<43, 4, PredCondition> cond;
-        BitField<45, 2, PredOperation> op;
-        BitField<3, 3, u64> pred3;
-        BitField<0, 3, u64> pred0;
-        BitField<39, 3, u64> pred39;
-    } vsetp;
-
-    union {
-        BitField<12, 3, u64> pred12;
-        BitField<15, 1, u64> neg_pred12;
-        BitField<24, 2, PredOperation> cond;
-        BitField<29, 3, u64> pred29;
-        BitField<32, 1, u64> neg_pred29;
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred39;
-        BitField<44, 1, u64> bf;
-        BitField<45, 2, PredOperation> op;
-    } pset;
-
-    union {
-        BitField<0, 3, u64> pred0;
-        BitField<3, 3, u64> pred3;
-        BitField<8, 5, ConditionCode> cc; // flag in cc
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred39;
-        BitField<45, 4, PredOperation> op; // op with pred39
-    } csetp;
-
-    union {
-        BitField<6, 1, u64> ftz;
-        BitField<45, 2, PredOperation> op;
-        BitField<3, 3, u64> pred3;
-        BitField<0, 3, u64> pred0;
-        BitField<43, 1, u64> negate_a;
-        BitField<44, 1, u64> abs_a;
-        BitField<47, 2, HalfType> type_a;
-        union {
-            BitField<35, 4, PredCondition> cond;
-            BitField<49, 1, u64> h_and;
-            BitField<31, 1, u64> negate_b;
-            BitField<30, 1, u64> abs_b;
-            BitField<28, 2, HalfType> type_b;
-        } reg;
-        union {
-            BitField<56, 1, u64> negate_b;
-            BitField<54, 1, u64> abs_b;
-        } cbuf;
-        union {
-            BitField<49, 4, PredCondition> cond;
-            BitField<53, 1, u64> h_and;
-        } cbuf_and_imm;
-        BitField<42, 1, u64> neg_pred;
-        BitField<39, 3, u64> pred39;
-    } hsetp2;
-
-    union {
-        BitField<40, 1, R2pMode> mode;
-        BitField<41, 2, u64> byte;
-        BitField<20, 7, u64> immediate_mask;
-    } p2r_r2p;
-
-    union {
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred;
-        BitField<43, 1, u64> neg_a;
-        BitField<44, 1, u64> abs_b;
-        BitField<45, 2, PredOperation> op;
-        BitField<48, 4, PredCondition> cond;
-        BitField<52, 1, u64> bf;
-        BitField<53, 1, u64> neg_b;
-        BitField<54, 1, u64> abs_a;
-        BitField<55, 1, u64> ftz;
-    } fset;
-
-    union {
-        BitField<47, 1, u64> ftz;
-        BitField<48, 4, PredCondition> cond;
-    } fcmp;
-
-    union {
-        BitField<49, 1, u64> bf;
-        BitField<35, 3, PredCondition> cond;
-        BitField<50, 1, u64> ftz;
-        BitField<45, 2, PredOperation> op;
-        BitField<43, 1, u64> negate_a;
-        BitField<44, 1, u64> abs_a;
-        BitField<47, 2, HalfType> type_a;
-        BitField<31, 1, u64> negate_b;
-        BitField<30, 1, u64> abs_b;
-        BitField<28, 2, HalfType> type_b;
-        BitField<42, 1, u64> neg_pred;
-        BitField<39, 3, u64> pred39;
-    } hset2;
-
-    union {
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred;
-        BitField<44, 1, u64> bf;
-        BitField<45, 2, PredOperation> op;
-        BitField<48, 1, u64> is_signed;
-        BitField<49, 3, PredCondition> cond;
-    } iset;
-
-    union {
-        BitField<45, 1, u64> negate_a;
-        BitField<49, 1, u64> abs_a;
-        BitField<10, 2, Register::Size> src_size;
-        BitField<13, 1, u64> is_input_signed;
-        BitField<8, 2, Register::Size> dst_size;
-        BitField<12, 1, u64> is_output_signed;
-
-        union {
-            BitField<39, 2, u64> tab5cb8_2;
-        } i2f;
-
-        union {
-            BitField<39, 2, F2iRoundingOp> rounding;
-        } f2i;
-
-        union {
-            BitField<39, 4, u64> rounding;
-            // H0, H1 extract for F16 missing
-            BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
-            [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
-                constexpr u64 rounding_mask = 0x0B;
-                return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
-            }
-        } f2f;
-
-        union {
-            BitField<41, 2, u64> selector;
-        } int_src;
-
-        union {
-            BitField<41, 1, u64> selector;
-        } float_src;
-    } conversion;
-
-    union {
-        BitField<28, 1, u64> array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<31, 4, u64> component_mask;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<54, 1, u64> aoffi_flag;
-        BitField<55, 3, TextureProcessMode> process_mode;
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            return process_mode;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return aoffi_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tex;
-
-    union {
-        BitField<28, 1, u64> array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<31, 4, u64> component_mask;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<36, 1, u64> aoffi_flag;
-        BitField<37, 3, TextureProcessMode> process_mode;
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            return process_mode;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return aoffi_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tex_b;
-
-    union {
-        BitField<22, 6, TextureQueryType> query_type;
-        BitField<31, 4, u64> component_mask;
-        BitField<49, 1, u64> nodep_flag;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
-    } txq;
-
-    union {
-        BitField<28, 1, u64> array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<31, 4, u64> component_mask;
-        BitField<35, 1, u64> ndv_flag;
-        BitField<49, 1, u64> nodep_flag;
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::NDV:
-                return (ndv_flag != 0);
-            case TextureMiscMode::NODEP:
-                return (nodep_flag != 0);
-            default:
-                break;
-            }
-            return false;
-        }
-    } tmml;
-
-    union {
-        BitField<28, 1, u64> array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<35, 1, u64> ndv_flag;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<54, 2, u64> offset_mode;
-        BitField<56, 2, u64> component;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::NDV:
-                return ndv_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return offset_mode == 1;
-            case TextureMiscMode::PTP:
-                return offset_mode == 2;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tld4;
-
-    union {
-        BitField<35, 1, u64> ndv_flag;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<33, 2, u64> offset_mode;
-        BitField<37, 2, u64> component;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::NDV:
-                return ndv_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return offset_mode == 1;
-            case TextureMiscMode::PTP:
-                return offset_mode == 2;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tld4_b;
-
-    union {
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<51, 1, u64> aoffi_flag;
-        BitField<52, 2, u64> component;
-        BitField<55, 1, u64> fp16_flag;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return aoffi_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tld4s;
-
-    union {
-        BitField<0, 8, Register> gpr0;
-        BitField<28, 8, Register> gpr28;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 3, u64> component_mask_selector;
-        BitField<53, 4, u64> texture_info;
-        BitField<59, 1, u64> fp32_flag;
-
-        [[nodiscard]] TextureType GetTextureType() const {
-            // The TEXS instruction has a weird encoding for the texture type.
-            if (texture_info == 0) {
-                return TextureType::Texture1D;
-            }
-            if (texture_info >= 1 && texture_info <= 9) {
-                return TextureType::Texture2D;
-            }
-            if (texture_info >= 10 && texture_info <= 11) {
-                return TextureType::Texture3D;
-            }
-            if (texture_info >= 12 && texture_info <= 13) {
-                return TextureType::TextureCube;
-            }
-
-            LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
-            UNREACHABLE();
-            return TextureType::Texture1D;
-        }
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            switch (texture_info) {
-            case 0:
-            case 2:
-            case 6:
-            case 8:
-            case 9:
-            case 11:
-                return TextureProcessMode::LZ;
-            case 3:
-            case 5:
-            case 13:
-                return TextureProcessMode::LL;
-            default:
-                break;
-            }
-            return TextureProcessMode::None;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::DC:
-                return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-
-        [[nodiscard]] bool IsArrayTexture() const {
-            // TEXS only supports Texture2D arrays.
-            return texture_info >= 7 && texture_info <= 9;
-        }
-
-        [[nodiscard]] bool HasTwoDestinations() const {
-            return gpr28.Value() != Register::ZeroIndex;
-        }
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
-                {},
-                {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
-                {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
-                {0x7, 0xb, 0xd, 0xe, 0xf},
-            }};
-
-            std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
-            index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
-
-            u32 mask = mask_lut[index][component_mask_selector];
-            // A mask of 0 means this instruction uses an unimplemented mask.
-            ASSERT(mask != 0);
-            return ((1ull << component) & mask) != 0;
-        }
-    } texs;
-
-    union {
-        BitField<28, 1, u64> is_array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<35, 1, u64> aoffi;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> ms; // Multisample?
-        BitField<54, 1, u64> cl;
-        BitField<55, 1, u64> process_mode;
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
-        }
-    } tld;
-
-    union {
-        BitField<49, 1, u64> nodep_flag;
-        BitField<53, 4, u64> texture_info;
-        BitField<59, 1, u64> fp32_flag;
-
-        [[nodiscard]] TextureType GetTextureType() const {
-            // The TLDS instruction has a weird encoding for the texture type.
-            if (texture_info <= 1) {
-                return TextureType::Texture1D;
-            }
-            if (texture_info == 2 || texture_info == 8 || texture_info == 12 ||
-                (texture_info >= 4 && texture_info <= 6)) {
-                return TextureType::Texture2D;
-            }
-            if (texture_info == 7) {
-                return TextureType::Texture3D;
-            }
-
-            LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
-            UNREACHABLE();
-            return TextureType::Texture1D;
-        }
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
-                return TextureProcessMode::LL;
-            }
-            return TextureProcessMode::LZ;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::AOFFI:
-                return texture_info == 12 || texture_info == 4;
-            case TextureMiscMode::MZ:
-                return texture_info == 5;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-
-        [[nodiscard]] bool IsArrayTexture() const {
-            // TEXS only supports Texture2D arrays.
-            return texture_info == 8;
-        }
-    } tlds;
-
-    union {
-        BitField<28, 1, u64> is_array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<35, 1, u64> aoffi_flag;
-        BitField<49, 1, u64> nodep_flag;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::AOFFI:
-                return aoffi_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-
-    } txd;
-
-    union {
-        BitField<24, 2, StoreCacheManagement> cache_management;
-        BitField<33, 3, ImageType> image_type;
-        BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
-        BitField<51, 1, u64> is_immediate;
-        BitField<52, 1, SurfaceDataMode> mode;
-
-        BitField<20, 3, StoreType> store_data_layout;
-        BitField<20, 4, u64> component_mask_selector;
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            ASSERT(mode == SurfaceDataMode::P);
-            constexpr u8 R = 0b0001;
-            constexpr u8 G = 0b0010;
-            constexpr u8 B = 0b0100;
-            constexpr u8 A = 0b1000;
-            constexpr std::array<u8, 16> mask = {
-                0,       (R),         (G),         (R | G),        (B),     (R | B),
-                (G | B), (R | G | B), (A),         (R | A),        (G | A), (R | G | A),
-                (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
-            return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
-        }
-
-        [[nodiscard]] StoreType GetStoreDataLayout() const {
-            ASSERT(mode == SurfaceDataMode::D_BA);
-            return store_data_layout;
-        }
-    } suldst;
-
-    union {
-        BitField<28, 1, u64> is_ba;
-        BitField<51, 3, ImageAtomicOperationType> operation_type;
-        BitField<33, 3, ImageType> image_type;
-        BitField<29, 4, ImageAtomicOperation> operation;
-        BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
-    } suatom_d;
-
-    union {
-        BitField<20, 24, u64> target;
-        BitField<5, 1, u64> constant_buffer;
-
-        [[nodiscard]] s32 GetBranchTarget() const {
-            // Sign extend the branch target offset
-            const auto mask = 1U << (24 - 1);
-            const auto target_value = static_cast<u32>(target);
-            constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
-
-            // The branch offset is relative to the next instruction and is stored in bytes, so
-            // divide it by the size of an instruction and add 1 to it.
-            return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
-        }
-    } bra;
-
-    union {
-        BitField<20, 24, u64> target;
-        BitField<5, 1, u64> constant_buffer;
-
-        [[nodiscard]] s32 GetBranchExtend() const {
-            // Sign extend the branch target offset
-            const auto mask = 1U << (24 - 1);
-            const auto target_value = static_cast<u32>(target);
-            constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
-
-            // The branch offset is relative to the next instruction and is stored in bytes, so
-            // divide it by the size of an instruction and add 1 to it.
-            return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
-        }
-    } brx;
-
-    union {
-        BitField<39, 1, u64> emit; // EmitVertex
-        BitField<40, 1, u64> cut;  // EndPrimitive
-    } out;
-
-    union {
-        BitField<31, 1, u64> skew;
-        BitField<32, 1, u64> o;
-        BitField<33, 2, IsberdMode> mode;
-        BitField<47, 2, IsberdShift> shift;
-    } isberd;
-
-    union {
-        BitField<8, 2, MembarType> type;
-        BitField<0, 2, MembarUnknown> unknown;
-    } membar;
-
-    union {
-        BitField<48, 1, u64> signed_a;
-        BitField<38, 1, u64> is_byte_chunk_a;
-        BitField<36, 2, VideoType> type_a;
-        BitField<36, 2, u64> byte_height_a;
-
-        BitField<49, 1, u64> signed_b;
-        BitField<50, 1, u64> use_register_b;
-        BitField<30, 1, u64> is_byte_chunk_b;
-        BitField<28, 2, VideoType> type_b;
-        BitField<28, 2, u64> byte_height_b;
-    } video;
-
-    union {
-        BitField<51, 2, VmadShr> shr;
-        BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
-        BitField<47, 1, u64> cc;
-    } vmad;
-
-    union {
-        BitField<54, 1, u64> is_dest_signed;
-        BitField<48, 1, u64> is_src_a_signed;
-        BitField<49, 1, u64> is_src_b_signed;
-        BitField<37, 2, u64> src_format_a;
-        BitField<29, 2, u64> src_format_b;
-        BitField<56, 1, u64> mx;
-        BitField<55, 1, u64> sat;
-        BitField<36, 2, u64> selector_a;
-        BitField<28, 2, u64> selector_b;
-        BitField<50, 1, u64> is_op_b_register;
-        BitField<51, 3, VmnmxOperation> operation;
-
-        [[nodiscard]] VmnmxType SourceFormatA() const {
-            switch (src_format_a) {
-            case 0b11:
-                return VmnmxType::Bits32;
-            case 0b10:
-                return VmnmxType::Bits16;
-            default:
-                return VmnmxType::Bits8;
-            }
-        }
-
-        [[nodiscard]] VmnmxType SourceFormatB() const {
-            switch (src_format_b) {
-            case 0b11:
-                return VmnmxType::Bits32;
-            case 0b10:
-                return VmnmxType::Bits16;
-            default:
-                return VmnmxType::Bits8;
-            }
-        }
-    } vmnmx;
-
-    union {
-        BitField<20, 16, u64> imm20_16;
-        BitField<35, 1, u64> high_b_rr; // used on RR
-        BitField<36, 1, u64> product_shift_left;
-        BitField<37, 1, u64> merge_37;
-        BitField<48, 1, u64> sign_a;
-        BitField<49, 1, u64> sign_b;
-        BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
-        BitField<50, 3, XmadMode> mode;
-        BitField<52, 1, u64> high_b;
-        BitField<53, 1, u64> high_a;
-        BitField<55, 1, u64> product_shift_left_second; // used on CR
-        BitField<56, 1, u64> merge_56;
-    } xmad;
-
-    union {
-        BitField<20, 14, u64> shifted_offset;
-        BitField<34, 5, u64> index;
-
-        [[nodiscard]] u64 GetOffset() const {
-            return shifted_offset * 4;
-        }
-    } cbuf34;
-
-    union {
-        BitField<20, 16, s64> offset;
-        BitField<36, 5, u64> index;
-
-        [[nodiscard]] s64 GetOffset() const {
-            return offset;
-        }
-    } cbuf36;
-
-    // Unsure about the size of this one.
-    // It's always used with a gpr0, so any size should be fine.
-    BitField<20, 8, SystemVariable> sys20;
-
-    BitField<47, 1, u64> generates_cc;
-    BitField<61, 1, u64> is_b_imm;
-    BitField<60, 1, u64> is_b_gpr;
-    BitField<59, 1, u64> is_c_gpr;
-    BitField<20, 24, s64> smem_imm;
-    BitField<0, 5, ConditionCode> flow_condition_code;
-
-    Attribute attribute;
-    Sampler sampler;
-    Image image;
-
-    u64 value;
-};
-static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
-static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout");
-
-class OpCode {
-public:
-    enum class Id {
-        KIL,
-        SSY,
-        SYNC,
-        BRK,
-        DEPBAR,
-        VOTE,
-        VOTE_VTG,
-        SHFL,
-        FSWZADD,
-        BFE_C,
-        BFE_R,
-        BFE_IMM,
-        BFI_RC,
-        BFI_IMM_R,
-        BRA,
-        BRX,
-        PBK,
-        LD_A,
-        LD_L,
-        LD_S,
-        LD_C,
-        LD,  // Load from generic memory
-        LDG, // Load from global memory
-        ST_A,
-        ST_L,
-        ST_S,
-        ST,    // Store in generic memory
-        STG,   // Store in global memory
-        RED,   // Reduction operation
-        ATOM,  // Atomic operation on global memory
-        ATOMS, // Atomic operation on shared memory
-        AL2P,  // Transforms attribute memory into physical memory
-        TEX,
-        TEX_B,  // Texture Load Bindless
-        TXQ,    // Texture Query
-        TXQ_B,  // Texture Query Bindless
-        TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
-        TLD,    // Texture Load
-        TLDS,   // Texture Load with scalar/non-vec4 source/destinations
-        TLD4,   // Texture Gather 4
-        TLD4_B, // Texture Gather 4 Bindless
-        TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
-        TMML_B, // Texture Mip Map Level
-        TMML,   // Texture Mip Map Level
-        TXD,    // Texture Gradient/Load with Derivates
-        TXD_B,  // Texture Gradient/Load with Derivates Bindless
-        SUST,   // Surface Store
-        SULD,   // Surface Load
-        SUATOM, // Surface Atomic Operation
-        EXIT,
-        NOP,
-        IPA,
-        OUT_R, // Emit vertex/primitive
-        ISBERD,
-        BAR,
-        MEMBAR,
-        VMAD,
-        VSETP,
-        VMNMX,
-        FFMA_IMM, // Fused Multiply and Add
-        FFMA_CR,
-        FFMA_RC,
-        FFMA_RR,
-        FADD_C,
-        FADD_R,
-        FADD_IMM,
-        FADD32I,
-        FMUL_C,
-        FMUL_R,
-        FMUL_IMM,
-        FMUL32_IMM,
-        IADD_C,
-        IADD_R,
-        IADD_IMM,
-        IADD3_C, // Add 3 Integers
-        IADD3_R,
-        IADD3_IMM,
-        IADD32I,
-        ISCADD_C, // Scale and Add
-        ISCADD_R,
-        ISCADD_IMM,
-        FLO_R,
-        FLO_C,
-        FLO_IMM,
-        LEA_R1,
-        LEA_R2,
-        LEA_RZ,
-        LEA_IMM,
-        LEA_HI,
-        HADD2_C,
-        HADD2_R,
-        HADD2_IMM,
-        HMUL2_C,
-        HMUL2_R,
-        HMUL2_IMM,
-        HFMA2_CR,
-        HFMA2_RC,
-        HFMA2_RR,
-        HFMA2_IMM_R,
-        HSETP2_C,
-        HSETP2_R,
-        HSETP2_IMM,
-        HSET2_C,
-        HSET2_R,
-        HSET2_IMM,
-        POPC_C,
-        POPC_R,
-        POPC_IMM,
-        SEL_C,
-        SEL_R,
-        SEL_IMM,
-        ICMP_RC,
-        ICMP_R,
-        ICMP_CR,
-        ICMP_IMM,
-        FCMP_RR,
-        FCMP_RC,
-        FCMP_IMMR,
-        MUFU,  // Multi-Function Operator
-        RRO_C, // Range Reduction Operator
-        RRO_R,
-        RRO_IMM,
-        F2F_C,
-        F2F_R,
-        F2F_IMM,
-        F2I_C,
-        F2I_R,
-        F2I_IMM,
-        I2F_C,
-        I2F_R,
-        I2F_IMM,
-        I2I_C,
-        I2I_R,
-        I2I_IMM,
-        LOP_C,
-        LOP_R,
-        LOP_IMM,
-        LOP32I,
-        LOP3_C,
-        LOP3_R,
-        LOP3_IMM,
-        MOV_C,
-        MOV_R,
-        MOV_IMM,
-        S2R,
-        MOV32_IMM,
-        SHL_C,
-        SHL_R,
-        SHL_IMM,
-        SHR_C,
-        SHR_R,
-        SHR_IMM,
-        SHF_RIGHT_R,
-        SHF_RIGHT_IMM,
-        SHF_LEFT_R,
-        SHF_LEFT_IMM,
-        FMNMX_C,
-        FMNMX_R,
-        FMNMX_IMM,
-        IMNMX_C,
-        IMNMX_R,
-        IMNMX_IMM,
-        FSETP_C, // Set Predicate
-        FSETP_R,
-        FSETP_IMM,
-        FSET_C,
-        FSET_R,
-        FSET_IMM,
-        ISETP_C,
-        ISETP_IMM,
-        ISETP_R,
-        ISET_R,
-        ISET_C,
-        ISET_IMM,
-        PSETP,
-        PSET,
-        CSETP,
-        R2P_IMM,
-        P2R_IMM,
-        XMAD_IMM,
-        XMAD_CR,
-        XMAD_RC,
-        XMAD_RR,
-    };
-
-    enum class Type {
-        Trivial,
-        Arithmetic,
-        ArithmeticImmediate,
-        ArithmeticInteger,
-        ArithmeticIntegerImmediate,
-        ArithmeticHalf,
-        ArithmeticHalfImmediate,
-        Bfe,
-        Bfi,
-        Shift,
-        Ffma,
-        Hfma2,
-        Flow,
-        Synch,
-        Warp,
-        Memory,
-        Texture,
-        Image,
-        FloatSet,
-        FloatSetPredicate,
-        IntegerSet,
-        IntegerSetPredicate,
-        HalfSet,
-        HalfSetPredicate,
-        PredicateSetPredicate,
-        PredicateSetRegister,
-        RegisterSetPredicate,
-        Conversion,
-        Video,
-        Xmad,
-        Unknown,
-    };
-
-    /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
-    /// conditionally executed).
-    [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
-        // TODO(Subv): Add the rest of unpredicated instructions.
-        return opcode != Id::SSY && opcode != Id::PBK;
-    }
-
-    class Matcher {
-    public:
-        constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
-            : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
-
-        [[nodiscard]] constexpr const char* GetName() const {
-            return name;
-        }
-
-        [[nodiscard]] constexpr u16 GetMask() const {
-            return mask;
-        }
-
-        [[nodiscard]] constexpr Id GetId() const {
-            return id;
-        }
-
-        [[nodiscard]] constexpr Type GetType() const {
-            return type;
-        }
-
-        /**
-         * Tests to see if the given instruction is the instruction this matcher represents.
-         * @param instruction The instruction to test
-         * @returns true if the given instruction matches.
-         */
-        [[nodiscard]] constexpr bool Matches(u16 instruction) const {
-            return (instruction & mask) == expected;
-        }
-
-    private:
-        const char* name;
-        u16 mask;
-        u16 expected;
-        Id id;
-        Type type;
-    };
-
-    using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
-    [[nodiscard]] static DecodeResult Decode(Instruction instr) {
-        static const auto table{GetDecodeTable()};
-
-        const auto matches_instruction = [instr](const auto& matcher) {
-            return matcher.Matches(static_cast<u16>(instr.opcode));
-        };
-
-        auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
-        return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
-                                   : std::nullopt;
-    }
-
-private:
-    struct Detail {
-    private:
-        static constexpr std::size_t opcode_bitsize = 16;
-
-        /**
-         * Generates the mask and the expected value after masking from a given bitstring.
-         * A '0' in a bitstring indicates that a zero must be present at that bit position.
-         * A '1' in a bitstring indicates that a one must be present at that bit position.
-         */
-        [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
-            u16 mask = 0, expect = 0;
-            for (std::size_t i = 0; i < opcode_bitsize; i++) {
-                const std::size_t bit_position = opcode_bitsize - i - 1;
-                switch (bitstring[i]) {
-                case '0':
-                    mask |= static_cast<u16>(1U << bit_position);
-                    break;
-                case '1':
-                    expect |= static_cast<u16>(1U << bit_position);
-                    mask |= static_cast<u16>(1U << bit_position);
-                    break;
-                default:
-                    // Ignore
-                    break;
-                }
-            }
-            return std::make_pair(mask, expect);
-        }
-
-    public:
-        /// Creates a matcher that can match and parse instructions based on bitstring.
-        [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
-                                                       Type type, const char* const name) {
-            const auto [mask, expected] = GetMaskAndExpect(bitstring);
-            return Matcher(name, mask, expected, op, type);
-        }
-    };
-
-    [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
-        std::vector<Matcher> table = {
-#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
-            INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
-            INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
-            INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
-            INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
-            INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
-            INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
-            INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
-            INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
-            INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
-            INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
-            INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
-            INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
-            INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
-            INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
-            INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
-            INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
-            INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
-            INST("100-------------", Id::LD, Type::Memory, "LD"),
-            INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
-            INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
-            INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
-            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
-            INST("101-------------", Id::ST, Type::Memory, "ST"),
-            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
-            INST("1110101111111---", Id::RED, Type::Memory, "RED"),
-            INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
-            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
-            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
-            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
-            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
-            INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
-            INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
-            INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
-            INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
-            INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
-            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
-            INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
-            INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
-            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
-            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
-            INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
-            INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
-            INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
-            INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
-            INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
-            INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
-            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
-            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
-            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
-            INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
-            INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
-            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
-            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
-            INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
-            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
-            INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
-            INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
-            INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
-            INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
-            INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
-            INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
-            INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
-            INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
-            INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
-            INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
-            INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
-            INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
-            INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
-            INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
-            INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"),
-            INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"),
-            INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"),
-            INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
-            INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
-            INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
-            INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
-            INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
-            INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
-            INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
-            INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
-            INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
-            INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
-            INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"),
-            INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
-            INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
-            INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
-            INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
-            INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
-            INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
-            INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
-            INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
-            INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
-            INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
-            INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
-            INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
-            INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
-            INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"),
-            INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
-            INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
-            INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
-            INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"),
-            INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
-            INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
-            INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
-            INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
-            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
-            INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
-            INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
-            INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
-            INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
-            INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
-            INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
-            INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
-            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
-            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
-            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
-            INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
-            INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
-            INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
-            INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
-            INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
-            INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
-            INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
-            INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
-            INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
-            INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
-            INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
-            INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
-            INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
-            INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
-            INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
-            INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
-            INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
-            INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
-            INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
-            INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
-            INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
-            INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
-            INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
-            INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
-            INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
-            INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
-            INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
-            INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
-            INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
-            INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"),
-            INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
-            INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
-            INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
-            INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
-            INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
-            INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
-            INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
-            INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
-            INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
-            INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
-            INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
-            INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
-            INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
-            INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
-            INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
-            INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
-            INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"),
-            INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"),
-            INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"),
-            INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"),
-            INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"),
-            INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"),
-            INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
-            INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
-            INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
-            INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
-            INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
-            INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
-            INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
-            INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
-            INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
-            INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
-            INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
-            INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
-            INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
-            INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
-            INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
-        };
-#undef INST
-        std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
-            // If a matcher has more bits in its mask it is more specific, so it
-            // should come first.
-            return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
-        });
-
-        return table;
-    }
-};
-
-} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
deleted file mode 100644
index e0d7b89c5..000000000
--- a/src/video_core/engines/shader_header.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <optional>
-
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-
-namespace Tegra::Shader {
-
-enum class OutputTopology : u32 {
-    PointList = 1,
-    LineStrip = 6,
-    TriangleStrip = 7,
-};
-
-enum class PixelImap : u8 {
-    Unused = 0,
-    Constant = 1,
-    Perspective = 2,
-    ScreenLinear = 3,
-};
-
-// Documentation in:
-// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
-struct Header {
-    union {
-        BitField<0, 5, u32> sph_type;
-        BitField<5, 5, u32> version;
-        BitField<10, 4, u32> shader_type;
-        BitField<14, 1, u32> mrt_enable;
-        BitField<15, 1, u32> kills_pixels;
-        BitField<16, 1, u32> does_global_store;
-        BitField<17, 4, u32> sass_version;
-        BitField<21, 5, u32> reserved;
-        BitField<26, 1, u32> does_load_or_store;
-        BitField<27, 1, u32> does_fp64;
-        BitField<28, 4, u32> stream_out_mask;
-    } common0;
-
-    union {
-        BitField<0, 24, u32> shader_local_memory_low_size;
-        BitField<24, 8, u32> per_patch_attribute_count;
-    } common1;
-
-    union {
-        BitField<0, 24, u32> shader_local_memory_high_size;
-        BitField<24, 8, u32> threads_per_input_primitive;
-    } common2;
-
-    union {
-        BitField<0, 24, u32> shader_local_memory_crs_size;
-        BitField<24, 4, OutputTopology> output_topology;
-        BitField<28, 4, u32> reserved;
-    } common3;
-
-    union {
-        BitField<0, 12, u32> max_output_vertices;
-        BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
-        BitField<20, 4, u32> reserved;
-        BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
-    } common4;
-
-    union {
-        struct {
-            INSERT_PADDING_BYTES_NOINIT(3);  // ImapSystemValuesA
-            INSERT_PADDING_BYTES_NOINIT(1);  // ImapSystemValuesB
-            INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
-            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
-            union {
-                BitField<0, 8, u16> clip_distances;
-                BitField<8, 1, u16> point_sprite_s;
-                BitField<9, 1, u16> point_sprite_t;
-                BitField<10, 1, u16> fog_coordinate;
-                BitField<12, 1, u16> tessellation_eval_point_u;
-                BitField<13, 1, u16> tessellation_eval_point_v;
-                BitField<14, 1, u16> instance_id;
-                BitField<15, 1, u16> vertex_id;
-            };
-            INSERT_PADDING_BYTES_NOINIT(5);  // ImapFixedFncTexture[10]
-            INSERT_PADDING_BYTES_NOINIT(1);  // ImapReserved
-            INSERT_PADDING_BYTES_NOINIT(3);  // OmapSystemValuesA
-            INSERT_PADDING_BYTES_NOINIT(1);  // OmapSystemValuesB
-            INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
-            INSERT_PADDING_BYTES_NOINIT(2);  // OmapColor
-            INSERT_PADDING_BYTES_NOINIT(2);  // OmapSystemValuesC
-            INSERT_PADDING_BYTES_NOINIT(5);  // OmapFixedFncTexture[10]
-            INSERT_PADDING_BYTES_NOINIT(1);  // OmapReserved
-        } vtg;
-
-        struct {
-            INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
-            INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
-
-            union {
-                BitField<0, 2, PixelImap> x;
-                BitField<2, 2, PixelImap> y;
-                BitField<4, 2, PixelImap> z;
-                BitField<6, 2, PixelImap> w;
-                u8 raw;
-            } imap_generic_vector[32];
-
-            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
-            INSERT_PADDING_BYTES_NOINIT(2);  // ImapSystemValuesC
-            INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
-            INSERT_PADDING_BYTES_NOINIT(2);  // ImapReserved
-
-            struct {
-                u32 target;
-                union {
-                    BitField<0, 1, u32> sample_mask;
-                    BitField<1, 1, u32> depth;
-                    BitField<2, 30, u32> reserved;
-                };
-            } omap;
-
-            bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
-                const u32 bit = render_target * 4 + component;
-                return omap.target & (1 << bit);
-            }
-
-            PixelImap GetPixelImap(u32 attribute) const {
-                const auto get_index = [this, attribute](u32 index) {
-                    return static_cast<PixelImap>(
-                        (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
-                };
-
-                std::optional<PixelImap> result;
-                for (u32 component = 0; component < 4; ++component) {
-                    const PixelImap index = get_index(component);
-                    if (index == PixelImap::Unused) {
-                        continue;
-                    }
-                    if (result && result != index) {
-                        LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
-                    }
-                    result = index;
-                }
-                return result.value_or(PixelImap::Unused);
-            }
-        } ps;
-
-        std::array<u32, 0xF> raw;
-    };
-
-    u64 GetLocalMemorySize() const {
-        return (common1.shader_local_memory_low_size |
-                (common2.shader_local_memory_high_size << 24));
-    }
-};
-static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
-
-} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h
deleted file mode 100644
index 49ce5cde5..000000000
--- a/src/video_core/engines/shader_type.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-
-enum class ShaderType : u32 {
-    Vertex = 0,
-    TesselationControl = 1,
-    TesselationEval = 2,
-    Geometry = 3,
-    Fragment = 4,
-    Compute = 5,
-};
-static constexpr std::size_t MaxShaderTypes = 6;
-
-} // namespace Tegra::Engines
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
deleted file mode 100644
index f058f2744..000000000
--- a/src/video_core/guest_driver.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <limits>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCore {
-
-void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
-    if (texture_handler_size) {
-        return;
-    }
-    const std::size_t size = bound_offsets.size();
-    if (size < 2) {
-        return;
-    }
-    std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
-    u32 min_val = std::numeric_limits<u32>::max();
-    for (std::size_t i = 1; i < size; ++i) {
-        if (bound_offsets[i] == bound_offsets[i - 1]) {
-            continue;
-        }
-        const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
-        min_val = std::min(min_val, new_min);
-    }
-    if (min_val > 2) {
-        return;
-    }
-    texture_handler_size = min_texture_handler_size * min_val;
-}
-
-} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
deleted file mode 100644
index 21e569ba1..000000000
--- a/src/video_core/guest_driver.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <vector>
-
-#include "common/common_types.h"
-
-namespace VideoCore {
-
-/**
- * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
- * information necessary for impossible to avoid HLE methods like shader tracks as they are
- * Entscheidungsproblems.
- */
-class GuestDriverProfile {
-public:
-    explicit GuestDriverProfile() = default;
-    explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
-        : texture_handler_size{texture_handler_size_} {}
-
-    void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
-
-    u32 GetTextureHandlerSize() const {
-        return texture_handler_size.value_or(default_texture_handler_size);
-    }
-
-    bool IsTextureHandlerSizeKnown() const {
-        return texture_handler_size.has_value();
-    }
-
-private:
-    // Minimum size of texture handler any driver can use.
-    static constexpr u32 min_texture_handler_size = 4;
-
-    // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
-    // Thus, certain drivers may squish the size.
-    static constexpr u32 default_texture_handler_size = 8;
-
-    std::optional<u32> texture_handler_size = default_texture_handler_size;
-};
-
-} // namespace VideoCore
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index d2b9d5f2b..882eff880 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,7 +69,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
     } else {
         UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
     }
-
     const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
 
     for (const auto& map : submapped_ranges) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 58014c1c3..b094fc064 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -11,7 +11,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
-#include "video_core/guest_driver.h"
 
 namespace Tegra {
 class MemoryManager;
@@ -45,7 +44,7 @@ public:
     virtual void Clear() = 0;
 
     /// Dispatches a compute shader invocation
-    virtual void DispatchCompute(GPUVAddr code_addr) = 0;
+    virtual void DispatchCompute() = 0;
 
     /// Resets the counter of a query
     virtual void ResetCounter(QueryType type) = 0;
@@ -136,18 +135,5 @@ public:
     /// Initialize disk cached resources for the game being emulated
     virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
                                    const DiskResourceLoadCallback& callback) {}
-
-    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
-    [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
-        return guest_driver_profile;
-    }
-
-    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
-    [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
-        return guest_driver_profile;
-    }
-
-private:
-    GuestDriverProfile guest_driver_profile{};
 };
 } // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index e8d8d2aa5..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
-// GLASM lacks booleans, so they have to be implemented as integers.
-// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
-// select between two values, because -1 will be evaluated as true and 0 as false.
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using namespace VideoCommon::Shader;
-using Operation = const OperationNode&;
-
-constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
-
-char Swizzle(std::size_t component) {
-    static constexpr std::string_view SWIZZLE{"xyzw"};
-    return SWIZZLE.at(component);
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
-    return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
-    ASSERT(IsGenericAttribute(index));
-    return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-std::string_view Modifiers(Operation operation) {
-    const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
-    if (meta && meta->precise) {
-        return ".PREC";
-    }
-    return "";
-}
-
-std::string_view GetInputFlags(PixelImap attribute) {
-    switch (attribute) {
-    case PixelImap::Perspective:
-        return "";
-    case PixelImap::Constant:
-        return "FLAT ";
-    case PixelImap::ScreenLinear:
-        return "NOPERSPECTIVE ";
-    case PixelImap::Unused:
-        break;
-    }
-    UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
-    return {};
-}
-
-std::string_view ImageType(Tegra::Shader::ImageType image_type) {
-    switch (image_type) {
-    case Tegra::Shader::ImageType::Texture1D:
-        return "1D";
-    case Tegra::Shader::ImageType::TextureBuffer:
-        return "BUFFER";
-    case Tegra::Shader::ImageType::Texture1DArray:
-        return "ARRAY1D";
-    case Tegra::Shader::ImageType::Texture2D:
-        return "2D";
-    case Tegra::Shader::ImageType::Texture2DArray:
-        return "ARRAY2D";
-    case Tegra::Shader::ImageType::Texture3D:
-        return "3D";
-    }
-    UNREACHABLE();
-    return {};
-}
-
-std::string_view StackName(MetaStackClass stack) {
-    switch (stack) {
-    case MetaStackClass::Ssy:
-        return "SSY";
-    case MetaStackClass::Pbk:
-        return "PBK";
-    }
-    UNREACHABLE();
-    return "";
-};
-
-std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
-    switch (topology) {
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
-        return "POINTS";
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
-        return "LINES";
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
-        return "LINES_ADJACENCY";
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
-        return "TRIANGLES";
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
-    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
-        return "TRIANGLES_ADJACENCY";
-    default:
-        UNIMPLEMENTED_MSG("topology={}", topology);
-        return "POINTS";
-    }
-}
-
-std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
-    switch (topology) {
-    case Tegra::Shader::OutputTopology::PointList:
-        return "POINTS";
-    case Tegra::Shader::OutputTopology::LineStrip:
-        return "LINE_STRIP";
-    case Tegra::Shader::OutputTopology::TriangleStrip:
-        return "TRIANGLE_STRIP";
-    default:
-        UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
-        return "points";
-    }
-}
-
-std::string_view StageInputName(ShaderType stage) {
-    switch (stage) {
-    case ShaderType::Vertex:
-    case ShaderType::Geometry:
-        return "vertex";
-    case ShaderType::Fragment:
-        return "fragment";
-    case ShaderType::Compute:
-        return "invocation";
-    default:
-        UNREACHABLE();
-        return "";
-    }
-}
-
-std::string TextureType(const MetaTexture& meta) {
-    if (meta.sampler.is_buffer) {
-        return "BUFFER";
-    }
-    std::string type;
-    if (meta.sampler.is_shadow) {
-        type += "SHADOW";
-    }
-    if (meta.sampler.is_array) {
-        type += "ARRAY";
-    }
-    type += [&meta] {
-        switch (meta.sampler.type) {
-        case Tegra::Shader::TextureType::Texture1D:
-            return "1D";
-        case Tegra::Shader::TextureType::Texture2D:
-            return "2D";
-        case Tegra::Shader::TextureType::Texture3D:
-            return "3D";
-        case Tegra::Shader::TextureType::TextureCube:
-            return "CUBE";
-        }
-        UNREACHABLE();
-        return "2D";
-    }();
-    return type;
-}
-
-class ARBDecompiler final {
-public:
-    explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
-                           ShaderType stage_, std::string_view identifier);
-
-    std::string Code() const {
-        return shader_source;
-    }
-
-private:
-    void DefineGlobalMemory();
-
-    void DeclareHeader();
-    void DeclareVertex();
-    void DeclareGeometry();
-    void DeclareFragment();
-    void DeclareCompute();
-    void DeclareInputAttributes();
-    void DeclareOutputAttributes();
-    void DeclareLocalMemory();
-    void DeclareGlobalMemory();
-    void DeclareConstantBuffers();
-    void DeclareRegisters();
-    void DeclareTemporaries();
-    void DeclarePredicates();
-    void DeclareInternalFlags();
-
-    void InitializeVariables();
-
-    void DecompileAST();
-    void DecompileBranchMode();
-
-    void VisitAST(const ASTNode& node);
-    std::string VisitExpression(const Expr& node);
-
-    void VisitBlock(const NodeBlock& bb);
-
-    std::string Visit(const Node& node);
-
-    std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
-    std::string BuildAoffi(Operation);
-    std::string GlobalMemoryPointer(const GmemNode& gmem);
-    void Exit();
-
-    std::string Assign(Operation);
-    std::string Select(Operation);
-    std::string FClamp(Operation);
-    std::string FCastHalf0(Operation);
-    std::string FCastHalf1(Operation);
-    std::string FSqrt(Operation);
-    std::string FSwizzleAdd(Operation);
-    std::string HAdd2(Operation);
-    std::string HMul2(Operation);
-    std::string HFma2(Operation);
-    std::string HAbsolute(Operation);
-    std::string HNegate(Operation);
-    std::string HClamp(Operation);
-    std::string HCastFloat(Operation);
-    std::string HUnpack(Operation);
-    std::string HMergeF32(Operation);
-    std::string HMergeH0(Operation);
-    std::string HMergeH1(Operation);
-    std::string HPack2(Operation);
-    std::string LogicalAssign(Operation);
-    std::string LogicalPick2(Operation);
-    std::string LogicalAnd2(Operation);
-    std::string FloatOrdered(Operation);
-    std::string FloatUnordered(Operation);
-    std::string LogicalAddCarry(Operation);
-    std::string Texture(Operation);
-    std::string TextureGather(Operation);
-    std::string TextureQueryDimensions(Operation);
-    std::string TextureQueryLod(Operation);
-    std::string TexelFetch(Operation);
-    std::string TextureGradient(Operation);
-    std::string ImageLoad(Operation);
-    std::string ImageStore(Operation);
-    std::string Branch(Operation);
-    std::string BranchIndirect(Operation);
-    std::string PushFlowStack(Operation);
-    std::string PopFlowStack(Operation);
-    std::string Exit(Operation);
-    std::string Discard(Operation);
-    std::string EmitVertex(Operation);
-    std::string EndPrimitive(Operation);
-    std::string InvocationId(Operation);
-    std::string YNegate(Operation);
-    std::string ThreadId(Operation);
-    std::string ShuffleIndexed(Operation);
-    std::string Barrier(Operation);
-    std::string MemoryBarrierGroup(Operation);
-    std::string MemoryBarrierGlobal(Operation);
-
-    template <const std::string_view& op>
-    std::string Unary(Operation operation) {
-        std::string temporary = AllocTemporary();
-        AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
-        return temporary;
-    }
-
-    template <const std::string_view& op>
-    std::string Binary(Operation operation) {
-        std::string temporary = AllocTemporary();
-        AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
-                Visit(operation[1]));
-        return temporary;
-    }
-
-    template <const std::string_view& op>
-    std::string Trinary(Operation operation) {
-        std::string temporary = AllocTemporary();
-        AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
-                Visit(operation[1]), Visit(operation[2]));
-        return temporary;
-    }
-
-    template <const std::string_view& op, bool unordered>
-    std::string FloatComparison(Operation operation) {
-        std::string temporary = AllocTemporary();
-        AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
-        AddLine("MOV.S {}, 0;", temporary);
-        AddLine("MOV.S {} (NE.x), -1;", temporary);
-
-        const std::string op_a = Visit(operation[0]);
-        const std::string op_b = Visit(operation[1]);
-        if constexpr (unordered) {
-            AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
-            AddLine("TRUNC.U.CC RC.x, RC.x;");
-            AddLine("MOV.S {} (NE.x), -1;", temporary);
-            AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
-            AddLine("TRUNC.U.CC RC.x, RC.x;");
-            AddLine("MOV.S {} (NE.x), -1;", temporary);
-        } else if (op == SNE_F) {
-            AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
-            AddLine("TRUNC.U.CC RC.x, RC.x;");
-            AddLine("MOV.S {} (NE.x), 0;", temporary);
-            AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
-            AddLine("TRUNC.U.CC RC.x, RC.x;");
-            AddLine("MOV.S {} (NE.x), 0;", temporary);
-        }
-        return temporary;
-    }
-
-    template <const std::string_view& op, bool is_nan>
-    std::string HalfComparison(Operation operation) {
-        std::string tmp1 = AllocVectorTemporary();
-        const std::string tmp2 = AllocVectorTemporary();
-        const std::string op_a = Visit(operation[0]);
-        const std::string op_b = Visit(operation[1]);
-        AddLine("UP2H.F {}, {};", tmp1, op_a);
-        AddLine("UP2H.F {}, {};", tmp2, op_b);
-        AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
-        AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
-        AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
-        AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
-        AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
-        if constexpr (is_nan) {
-            AddLine("MOVC.F RC.x, {};", op_a);
-            AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
-            AddLine("MOVC.F RC.x, {};", op_b);
-            AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
-        }
-        return tmp1;
-    }
-
-    template <const std::string_view& op, const std::string_view& type>
-    std::string AtomicImage(Operation operation) {
-        const auto& meta = std::get<MetaImage>(operation.GetMeta());
-        const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
-        const std::size_t num_coords = operation.GetOperandsCount();
-        const std::size_t num_values = meta.values.size();
-
-        const std::string coord = AllocVectorTemporary();
-        const std::string value = AllocVectorTemporary();
-        for (std::size_t i = 0; i < num_coords; ++i) {
-            AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
-        }
-        for (std::size_t i = 0; i < num_values; ++i) {
-            AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
-        }
-
-        AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
-                image_id, ImageType(meta.image.type));
-        return fmt::format("{}.x", coord);
-    }
-
-    template <const std::string_view& op, const std::string_view& type>
-    std::string Atomic(Operation operation) {
-        std::string temporary = AllocTemporary();
-        std::string address;
-        std::string_view opname;
-        bool robust = false;
-        if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
-            address = GlobalMemoryPointer(*gmem);
-            opname = "ATOM";
-            robust = true;
-        } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
-            address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
-            opname = "ATOMS";
-        } else {
-            UNREACHABLE();
-            return "{0, 0, 0, 0}";
-        }
-        if (robust) {
-            AddLine("IF NE.x;");
-        }
-        AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
-        if (robust) {
-            AddLine("ELSE;");
-            AddLine("MOV.S {}, 0;", temporary);
-            AddLine("ENDIF;");
-        }
-        return temporary;
-    }
-
-    template <char type>
-    std::string Negate(Operation operation) {
-        std::string temporary = AllocTemporary();
-        if constexpr (type == 'F') {
-            AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
-        } else {
-            AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
-        }
-        return temporary;
-    }
-
-    template <char type>
-    std::string Absolute(Operation operation) {
-        std::string temporary = AllocTemporary();
-        AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
-        return temporary;
-    }
-
-    template <char type>
-    std::string BitfieldInsert(Operation operation) {
-        const std::string temporary = AllocVectorTemporary();
-        AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
-        AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
-        AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
-                Visit(operation[0]));
-        return fmt::format("{}.x", temporary);
-    }
-
-    template <char type>
-    std::string BitfieldExtract(Operation operation) {
-        const std::string temporary = AllocVectorTemporary();
-        AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
-        AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
-        AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
-        return fmt::format("{}.x", temporary);
-    }
-
-    template <char swizzle>
-    std::string LocalInvocationId(Operation) {
-        return fmt::format("invocation.localid.{}", swizzle);
-    }
-
-    template <char swizzle>
-    std::string WorkGroupId(Operation) {
-        return fmt::format("invocation.groupid.{}", swizzle);
-    }
-
-    template <char c1, char c2>
-    std::string ThreadMask(Operation) {
-        return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
-    }
-
-    template <typename... Args>
-    void AddExpression(std::string_view text, Args&&... args) {
-        shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
-    }
-
-    template <typename... Args>
-    void AddLine(std::string_view text, Args&&... args) {
-        AddExpression(text, std::forward<Args>(args)...);
-        shader_source += '\n';
-    }
-
-    std::string AllocLongVectorTemporary() {
-        max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
-        return fmt::format("L{}", num_long_temporaries++);
-    }
-
-    std::string AllocLongTemporary() {
-        return fmt::format("{}.x", AllocLongVectorTemporary());
-    }
-
-    std::string AllocVectorTemporary() {
-        max_temporaries = std::max(max_temporaries, num_temporaries + 1);
-        return fmt::format("T{}", num_temporaries++);
-    }
-
-    std::string AllocTemporary() {
-        return fmt::format("{}.x", AllocVectorTemporary());
-    }
-
-    void ResetTemporaries() noexcept {
-        num_temporaries = 0;
-        num_long_temporaries = 0;
-    }
-
-    const Device& device;
-    const ShaderIR& ir;
-    const Registry& registry;
-    const ShaderType stage;
-
-    std::size_t num_temporaries = 0;
-    std::size_t max_temporaries = 0;
-
-    std::size_t num_long_temporaries = 0;
-    std::size_t max_long_temporaries = 0;
-
-    std::map<GlobalMemoryBase, u32> global_memory_names;
-
-    std::string shader_source;
-
-    static constexpr std::string_view ADD_F32 = "ADD.F32";
-    static constexpr std::string_view ADD_S = "ADD.S";
-    static constexpr std::string_view ADD_U = "ADD.U";
-    static constexpr std::string_view MUL_F32 = "MUL.F32";
-    static constexpr std::string_view MUL_S = "MUL.S";
-    static constexpr std::string_view MUL_U = "MUL.U";
-    static constexpr std::string_view DIV_F32 = "DIV.F32";
-    static constexpr std::string_view DIV_S = "DIV.S";
-    static constexpr std::string_view DIV_U = "DIV.U";
-    static constexpr std::string_view MAD_F32 = "MAD.F32";
-    static constexpr std::string_view RSQ_F32 = "RSQ.F32";
-    static constexpr std::string_view COS_F32 = "COS.F32";
-    static constexpr std::string_view SIN_F32 = "SIN.F32";
-    static constexpr std::string_view EX2_F32 = "EX2.F32";
-    static constexpr std::string_view LG2_F32 = "LG2.F32";
-    static constexpr std::string_view SLT_F = "SLT.F32";
-    static constexpr std::string_view SLT_S = "SLT.S";
-    static constexpr std::string_view SLT_U = "SLT.U";
-    static constexpr std::string_view SEQ_F = "SEQ.F32";
-    static constexpr std::string_view SEQ_S = "SEQ.S";
-    static constexpr std::string_view SEQ_U = "SEQ.U";
-    static constexpr std::string_view SLE_F = "SLE.F32";
-    static constexpr std::string_view SLE_S = "SLE.S";
-    static constexpr std::string_view SLE_U = "SLE.U";
-    static constexpr std::string_view SGT_F = "SGT.F32";
-    static constexpr std::string_view SGT_S = "SGT.S";
-    static constexpr std::string_view SGT_U = "SGT.U";
-    static constexpr std::string_view SNE_F = "SNE.F32";
-    static constexpr std::string_view SNE_S = "SNE.S";
-    static constexpr std::string_view SNE_U = "SNE.U";
-    static constexpr std::string_view SGE_F = "SGE.F32";
-    static constexpr std::string_view SGE_S = "SGE.S";
-    static constexpr std::string_view SGE_U = "SGE.U";
-    static constexpr std::string_view AND_S = "AND.S";
-    static constexpr std::string_view AND_U = "AND.U";
-    static constexpr std::string_view TRUNC_F = "TRUNC.F";
-    static constexpr std::string_view TRUNC_S = "TRUNC.S";
-    static constexpr std::string_view TRUNC_U = "TRUNC.U";
-    static constexpr std::string_view SHL_S = "SHL.S";
-    static constexpr std::string_view SHL_U = "SHL.U";
-    static constexpr std::string_view SHR_S = "SHR.S";
-    static constexpr std::string_view SHR_U = "SHR.U";
-    static constexpr std::string_view OR_S = "OR.S";
-    static constexpr std::string_view OR_U = "OR.U";
-    static constexpr std::string_view XOR_S = "XOR.S";
-    static constexpr std::string_view XOR_U = "XOR.U";
-    static constexpr std::string_view NOT_S = "NOT.S";
-    static constexpr std::string_view NOT_U = "NOT.U";
-    static constexpr std::string_view BTC_S = "BTC.S";
-    static constexpr std::string_view BTC_U = "BTC.U";
-    static constexpr std::string_view BTFM_S = "BTFM.S";
-    static constexpr std::string_view BTFM_U = "BTFM.U";
-    static constexpr std::string_view ROUND_F = "ROUND.F";
-    static constexpr std::string_view CEIL_F = "CEIL.F";
-    static constexpr std::string_view FLR_F = "FLR.F";
-    static constexpr std::string_view I2F_S = "I2F.S";
-    static constexpr std::string_view I2F_U = "I2F.U";
-    static constexpr std::string_view MIN_F = "MIN.F";
-    static constexpr std::string_view MIN_S = "MIN.S";
-    static constexpr std::string_view MIN_U = "MIN.U";
-    static constexpr std::string_view MAX_F = "MAX.F";
-    static constexpr std::string_view MAX_S = "MAX.S";
-    static constexpr std::string_view MAX_U = "MAX.U";
-    static constexpr std::string_view MOV_U = "MOV.U";
-    static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
-    static constexpr std::string_view TGALL_U = "TGALL.U";
-    static constexpr std::string_view TGANY_U = "TGANY.U";
-    static constexpr std::string_view TGEQ_U = "TGEQ.U";
-    static constexpr std::string_view EXCH = "EXCH";
-    static constexpr std::string_view ADD = "ADD";
-    static constexpr std::string_view MIN = "MIN";
-    static constexpr std::string_view MAX = "MAX";
-    static constexpr std::string_view AND = "AND";
-    static constexpr std::string_view OR = "OR";
-    static constexpr std::string_view XOR = "XOR";
-    static constexpr std::string_view U32 = "U32";
-    static constexpr std::string_view S32 = "S32";
-
-    static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
-    using DecompilerType = std::string (ARBDecompiler::*)(Operation);
-    static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
-        &ARBDecompiler::Assign,
-
-        &ARBDecompiler::Select,
-
-        &ARBDecompiler::Binary<ADD_F32>,
-        &ARBDecompiler::Binary<MUL_F32>,
-        &ARBDecompiler::Binary<DIV_F32>,
-        &ARBDecompiler::Trinary<MAD_F32>,
-        &ARBDecompiler::Negate<'F'>,
-        &ARBDecompiler::Absolute<'F'>,
-        &ARBDecompiler::FClamp,
-        &ARBDecompiler::FCastHalf0,
-        &ARBDecompiler::FCastHalf1,
-        &ARBDecompiler::Binary<MIN_F>,
-        &ARBDecompiler::Binary<MAX_F>,
-        &ARBDecompiler::Unary<COS_F32>,
-        &ARBDecompiler::Unary<SIN_F32>,
-        &ARBDecompiler::Unary<EX2_F32>,
-        &ARBDecompiler::Unary<LG2_F32>,
-        &ARBDecompiler::Unary<RSQ_F32>,
-        &ARBDecompiler::FSqrt,
-        &ARBDecompiler::Unary<ROUND_F>,
-        &ARBDecompiler::Unary<FLR_F>,
-        &ARBDecompiler::Unary<CEIL_F>,
-        &ARBDecompiler::Unary<TRUNC_F>,
-        &ARBDecompiler::Unary<I2F_S>,
-        &ARBDecompiler::Unary<I2F_U>,
-        &ARBDecompiler::FSwizzleAdd,
-
-        &ARBDecompiler::Binary<ADD_S>,
-        &ARBDecompiler::Binary<MUL_S>,
-        &ARBDecompiler::Binary<DIV_S>,
-        &ARBDecompiler::Negate<'S'>,
-        &ARBDecompiler::Absolute<'S'>,
-        &ARBDecompiler::Binary<MIN_S>,
-        &ARBDecompiler::Binary<MAX_S>,
-
-        &ARBDecompiler::Unary<TRUNC_S>,
-        &ARBDecompiler::Unary<MOV_U>,
-        &ARBDecompiler::Binary<SHL_S>,
-        &ARBDecompiler::Binary<SHR_U>,
-        &ARBDecompiler::Binary<SHR_S>,
-        &ARBDecompiler::Binary<AND_S>,
-        &ARBDecompiler::Binary<OR_S>,
-        &ARBDecompiler::Binary<XOR_S>,
-        &ARBDecompiler::Unary<NOT_S>,
-        &ARBDecompiler::BitfieldInsert<'S'>,
-        &ARBDecompiler::BitfieldExtract<'S'>,
-        &ARBDecompiler::Unary<BTC_S>,
-        &ARBDecompiler::Unary<BTFM_S>,
-
-        &ARBDecompiler::Binary<ADD_U>,
-        &ARBDecompiler::Binary<MUL_U>,
-        &ARBDecompiler::Binary<DIV_U>,
-        &ARBDecompiler::Binary<MIN_U>,
-        &ARBDecompiler::Binary<MAX_U>,
-        &ARBDecompiler::Unary<TRUNC_U>,
-        &ARBDecompiler::Unary<MOV_U>,
-        &ARBDecompiler::Binary<SHL_U>,
-        &ARBDecompiler::Binary<SHR_U>,
-        &ARBDecompiler::Binary<SHR_U>,
-        &ARBDecompiler::Binary<AND_U>,
-        &ARBDecompiler::Binary<OR_U>,
-        &ARBDecompiler::Binary<XOR_U>,
-        &ARBDecompiler::Unary<NOT_U>,
-        &ARBDecompiler::BitfieldInsert<'U'>,
-        &ARBDecompiler::BitfieldExtract<'U'>,
-        &ARBDecompiler::Unary<BTC_U>,
-        &ARBDecompiler::Unary<BTFM_U>,
-
-        &ARBDecompiler::HAdd2,
-        &ARBDecompiler::HMul2,
-        &ARBDecompiler::HFma2,
-        &ARBDecompiler::HAbsolute,
-        &ARBDecompiler::HNegate,
-        &ARBDecompiler::HClamp,
-        &ARBDecompiler::HCastFloat,
-        &ARBDecompiler::HUnpack,
-        &ARBDecompiler::HMergeF32,
-        &ARBDecompiler::HMergeH0,
-        &ARBDecompiler::HMergeH1,
-        &ARBDecompiler::HPack2,
-
-        &ARBDecompiler::LogicalAssign,
-        &ARBDecompiler::Binary<AND_U>,
-        &ARBDecompiler::Binary<OR_U>,
-        &ARBDecompiler::Binary<XOR_U>,
-        &ARBDecompiler::Unary<NOT_U>,
-        &ARBDecompiler::LogicalPick2,
-        &ARBDecompiler::LogicalAnd2,
-
-        &ARBDecompiler::FloatComparison<SLT_F, false>,
-        &ARBDecompiler::FloatComparison<SEQ_F, false>,
-        &ARBDecompiler::FloatComparison<SLE_F, false>,
-        &ARBDecompiler::FloatComparison<SGT_F, false>,
-        &ARBDecompiler::FloatComparison<SNE_F, false>,
-        &ARBDecompiler::FloatComparison<SGE_F, false>,
-        &ARBDecompiler::FloatOrdered,
-        &ARBDecompiler::FloatUnordered,
-        &ARBDecompiler::FloatComparison<SLT_F, true>,
-        &ARBDecompiler::FloatComparison<SEQ_F, true>,
-        &ARBDecompiler::FloatComparison<SLE_F, true>,
-        &ARBDecompiler::FloatComparison<SGT_F, true>,
-        &ARBDecompiler::FloatComparison<SNE_F, true>,
-        &ARBDecompiler::FloatComparison<SGE_F, true>,
-
-        &ARBDecompiler::Binary<SLT_S>,
-        &ARBDecompiler::Binary<SEQ_S>,
-        &ARBDecompiler::Binary<SLE_S>,
-        &ARBDecompiler::Binary<SGT_S>,
-        &ARBDecompiler::Binary<SNE_S>,
-        &ARBDecompiler::Binary<SGE_S>,
-
-        &ARBDecompiler::Binary<SLT_U>,
-        &ARBDecompiler::Binary<SEQ_U>,
-        &ARBDecompiler::Binary<SLE_U>,
-        &ARBDecompiler::Binary<SGT_U>,
-        &ARBDecompiler::Binary<SNE_U>,
-        &ARBDecompiler::Binary<SGE_U>,
-
-        &ARBDecompiler::LogicalAddCarry,
-
-        &ARBDecompiler::HalfComparison<SLT_F, false>,
-        &ARBDecompiler::HalfComparison<SEQ_F, false>,
-        &ARBDecompiler::HalfComparison<SLE_F, false>,
-        &ARBDecompiler::HalfComparison<SGT_F, false>,
-        &ARBDecompiler::HalfComparison<SNE_F, false>,
-        &ARBDecompiler::HalfComparison<SGE_F, false>,
-        &ARBDecompiler::HalfComparison<SLT_F, true>,
-        &ARBDecompiler::HalfComparison<SEQ_F, true>,
-        &ARBDecompiler::HalfComparison<SLE_F, true>,
-        &ARBDecompiler::HalfComparison<SGT_F, true>,
-        &ARBDecompiler::HalfComparison<SNE_F, true>,
-        &ARBDecompiler::HalfComparison<SGE_F, true>,
-
-        &ARBDecompiler::Texture,
-        &ARBDecompiler::Texture,
-        &ARBDecompiler::TextureGather,
-        &ARBDecompiler::TextureQueryDimensions,
-        &ARBDecompiler::TextureQueryLod,
-        &ARBDecompiler::TexelFetch,
-        &ARBDecompiler::TextureGradient,
-
-        &ARBDecompiler::ImageLoad,
-        &ARBDecompiler::ImageStore,
-
-        &ARBDecompiler::AtomicImage<ADD, U32>,
-        &ARBDecompiler::AtomicImage<AND, U32>,
-        &ARBDecompiler::AtomicImage<OR, U32>,
-        &ARBDecompiler::AtomicImage<XOR, U32>,
-        &ARBDecompiler::AtomicImage<EXCH, U32>,
-
-        &ARBDecompiler::Atomic<EXCH, U32>,
-        &ARBDecompiler::Atomic<ADD, U32>,
-        &ARBDecompiler::Atomic<MIN, U32>,
-        &ARBDecompiler::Atomic<MAX, U32>,
-        &ARBDecompiler::Atomic<AND, U32>,
-        &ARBDecompiler::Atomic<OR, U32>,
-        &ARBDecompiler::Atomic<XOR, U32>,
-
-        &ARBDecompiler::Atomic<EXCH, S32>,
-        &ARBDecompiler::Atomic<ADD, S32>,
-        &ARBDecompiler::Atomic<MIN, S32>,
-        &ARBDecompiler::Atomic<MAX, S32>,
-        &ARBDecompiler::Atomic<AND, S32>,
-        &ARBDecompiler::Atomic<OR, S32>,
-        &ARBDecompiler::Atomic<XOR, S32>,
-
-        &ARBDecompiler::Atomic<ADD, U32>,
-        &ARBDecompiler::Atomic<MIN, U32>,
-        &ARBDecompiler::Atomic<MAX, U32>,
-        &ARBDecompiler::Atomic<AND, U32>,
-        &ARBDecompiler::Atomic<OR, U32>,
-        &ARBDecompiler::Atomic<XOR, U32>,
-
-        &ARBDecompiler::Atomic<ADD, S32>,
-        &ARBDecompiler::Atomic<MIN, S32>,
-        &ARBDecompiler::Atomic<MAX, S32>,
-        &ARBDecompiler::Atomic<AND, S32>,
-        &ARBDecompiler::Atomic<OR, S32>,
-        &ARBDecompiler::Atomic<XOR, S32>,
-
-        &ARBDecompiler::Branch,
-        &ARBDecompiler::BranchIndirect,
-        &ARBDecompiler::PushFlowStack,
-        &ARBDecompiler::PopFlowStack,
-        &ARBDecompiler::Exit,
-        &ARBDecompiler::Discard,
-
-        &ARBDecompiler::EmitVertex,
-        &ARBDecompiler::EndPrimitive,
-
-        &ARBDecompiler::InvocationId,
-        &ARBDecompiler::YNegate,
-        &ARBDecompiler::LocalInvocationId<'x'>,
-        &ARBDecompiler::LocalInvocationId<'y'>,
-        &ARBDecompiler::LocalInvocationId<'z'>,
-        &ARBDecompiler::WorkGroupId<'x'>,
-        &ARBDecompiler::WorkGroupId<'y'>,
-        &ARBDecompiler::WorkGroupId<'z'>,
-
-        &ARBDecompiler::Unary<TGBALLOT_U>,
-        &ARBDecompiler::Unary<TGALL_U>,
-        &ARBDecompiler::Unary<TGANY_U>,
-        &ARBDecompiler::Unary<TGEQ_U>,
-
-        &ARBDecompiler::ThreadId,
-        &ARBDecompiler::ThreadMask<'e', 'q'>,
-        &ARBDecompiler::ThreadMask<'g', 'e'>,
-        &ARBDecompiler::ThreadMask<'g', 't'>,
-        &ARBDecompiler::ThreadMask<'l', 'e'>,
-        &ARBDecompiler::ThreadMask<'l', 't'>,
-        &ARBDecompiler::ShuffleIndexed,
-
-        &ARBDecompiler::Barrier,
-        &ARBDecompiler::MemoryBarrierGroup,
-        &ARBDecompiler::MemoryBarrierGlobal,
-    };
-};
-
-ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
-                             ShaderType stage_, std::string_view identifier)
-    : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
-    DefineGlobalMemory();
-
-    AddLine("TEMP RC;");
-    AddLine("TEMP FSWZA[4];");
-    AddLine("TEMP FSWZB[4];");
-    if (ir.IsDecompiled()) {
-        DecompileAST();
-    } else {
-        DecompileBranchMode();
-    }
-    AddLine("END");
-
-    const std::string code = std::move(shader_source);
-    DeclareHeader();
-    DeclareVertex();
-    DeclareGeometry();
-    DeclareFragment();
-    DeclareCompute();
-    DeclareInputAttributes();
-    DeclareOutputAttributes();
-    DeclareLocalMemory();
-    DeclareGlobalMemory();
-    DeclareConstantBuffers();
-    DeclareRegisters();
-    DeclareTemporaries();
-    DeclarePredicates();
-    DeclareInternalFlags();
-
-    shader_source += code;
-}
-
-std::string_view HeaderStageName(ShaderType stage) {
-    switch (stage) {
-    case ShaderType::Vertex:
-        return "vp";
-    case ShaderType::Geometry:
-        return "gp";
-    case ShaderType::Fragment:
-        return "fp";
-    case ShaderType::Compute:
-        return "cp";
-    default:
-        UNREACHABLE();
-        return "";
-    }
-}
-
-void ARBDecompiler::DefineGlobalMemory() {
-    u32 binding = 0;
-    for (const auto& pair : ir.GetGlobalMemory()) {
-        const GlobalMemoryBase base = pair.first;
-        global_memory_names.emplace(base, binding);
-        ++binding;
-    }
-}
-
-void ARBDecompiler::DeclareHeader() {
-    AddLine("!!NV{}5.0", HeaderStageName(stage));
-    // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
-    AddLine("OPTION NV_internal;");
-    AddLine("OPTION NV_gpu_program_fp64;");
-    AddLine("OPTION NV_shader_thread_group;");
-    if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
-        AddLine("OPTION NV_shader_thread_shuffle;");
-    }
-    if (stage == ShaderType::Vertex) {
-        if (device.HasNvViewportArray2()) {
-            AddLine("OPTION NV_viewport_array2;");
-        }
-    }
-    if (stage == ShaderType::Fragment) {
-        AddLine("OPTION ARB_draw_buffers;");
-    }
-    if (device.HasImageLoadFormatted()) {
-        AddLine("OPTION EXT_shader_image_load_formatted;");
-    }
-}
-
-void ARBDecompiler::DeclareVertex() {
-    if (stage != ShaderType::Vertex) {
-        return;
-    }
-    AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
-}
-
-void ARBDecompiler::DeclareGeometry() {
-    if (stage != ShaderType::Geometry) {
-        return;
-    }
-    const auto& info = registry.GetGraphicsInfo();
-    const auto& header = ir.GetHeader();
-    AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
-    AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
-    AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
-    AddLine("ATTRIB vertex_position = vertex.position;");
-}
-
-void ARBDecompiler::DeclareFragment() {
-    if (stage != ShaderType::Fragment) {
-        return;
-    }
-    AddLine("OUTPUT result_color7 = result.color[7];");
-    AddLine("OUTPUT result_color6 = result.color[6];");
-    AddLine("OUTPUT result_color5 = result.color[5];");
-    AddLine("OUTPUT result_color4 = result.color[4];");
-    AddLine("OUTPUT result_color3 = result.color[3];");
-    AddLine("OUTPUT result_color2 = result.color[2];");
-    AddLine("OUTPUT result_color1 = result.color[1];");
-    AddLine("OUTPUT result_color0 = result.color;");
-}
-
-void ARBDecompiler::DeclareCompute() {
-    if (stage != ShaderType::Compute) {
-        return;
-    }
-    const ComputeInfo& info = registry.GetComputeInfo();
-    AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
-            info.workgroup_size[2]);
-    if (info.shared_memory_size_in_words == 0) {
-        return;
-    }
-    const u32 limit = device.GetMaxComputeSharedMemorySize();
-    u32 size_in_bytes = info.shared_memory_size_in_words * 4;
-    if (size_in_bytes > limit) {
-        LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
-                  size_in_bytes, limit);
-        size_in_bytes = limit;
-    }
-
-    AddLine("SHARED_MEMORY {};", size_in_bytes);
-    AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
-}
-
-void ARBDecompiler::DeclareInputAttributes() {
-    if (stage == ShaderType::Compute) {
-        return;
-    }
-    const std::string_view stage_name = StageInputName(stage);
-    for (const auto attribute : ir.GetInputAttributes()) {
-        if (!IsGenericAttribute(attribute)) {
-            continue;
-        }
-        const u32 index = GetGenericAttributeIndex(attribute);
-
-        std::string_view suffix;
-        if (stage == ShaderType::Fragment) {
-            const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
-            if (input_mode == PixelImap::Unused) {
-                return;
-            }
-            suffix = GetInputFlags(input_mode);
-        }
-        AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
-                index);
-    }
-}
-
-void ARBDecompiler::DeclareOutputAttributes() {
-    if (stage == ShaderType::Compute) {
-        return;
-    }
-    for (const auto attribute : ir.GetOutputAttributes()) {
-        if (!IsGenericAttribute(attribute)) {
-            continue;
-        }
-        const u32 index = GetGenericAttributeIndex(attribute);
-        AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
-    }
-}
-
-void ARBDecompiler::DeclareLocalMemory() {
-    u64 size = 0;
-    if (stage == ShaderType::Compute) {
-        size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
-    } else {
-        size = ir.GetHeader().GetLocalMemorySize();
-    }
-    if (size == 0) {
-        return;
-    }
-    const u64 element_count = Common::AlignUp(size, 4) / 4;
-    AddLine("TEMP lmem[{}];", element_count);
-}
-
-void ARBDecompiler::DeclareGlobalMemory() {
-    const size_t num_entries = ir.GetGlobalMemory().size();
-    if (num_entries > 0) {
-        AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
-    }
-}
-
-void ARBDecompiler::DeclareConstantBuffers() {
-    u32 binding = 0;
-    for (const auto& cbuf : ir.GetConstantBuffers()) {
-        AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
-        ++binding;
-    }
-}
-
-void ARBDecompiler::DeclareRegisters() {
-    for (const u32 gpr : ir.GetRegisters()) {
-        AddLine("TEMP R{};", gpr);
-    }
-}
-
-void ARBDecompiler::DeclareTemporaries() {
-    for (std::size_t i = 0; i < max_temporaries; ++i) {
-        AddLine("TEMP T{};", i);
-    }
-    for (std::size_t i = 0; i < max_long_temporaries; ++i) {
-        AddLine("LONG TEMP L{};", i);
-    }
-}
-
-void ARBDecompiler::DeclarePredicates() {
-    for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
-        AddLine("TEMP P{};", static_cast<u64>(pred));
-    }
-}
-
-void ARBDecompiler::DeclareInternalFlags() {
-    for (const char* name : INTERNAL_FLAG_NAMES) {
-        AddLine("TEMP {};", name);
-    }
-}
-
-void ARBDecompiler::InitializeVariables() {
-    AddLine("MOV.F32 FSWZA[0], -1;");
-    AddLine("MOV.F32 FSWZA[1], 1;");
-    AddLine("MOV.F32 FSWZA[2], -1;");
-    AddLine("MOV.F32 FSWZA[3], 0;");
-    AddLine("MOV.F32 FSWZB[0], -1;");
-    AddLine("MOV.F32 FSWZB[1], -1;");
-    AddLine("MOV.F32 FSWZB[2], 1;");
-    AddLine("MOV.F32 FSWZB[3], -1;");
-
-    if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
-        AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
-    }
-    for (const auto attribute : ir.GetOutputAttributes()) {
-        if (!IsGenericAttribute(attribute)) {
-            continue;
-        }
-        const u32 index = GetGenericAttributeIndex(attribute);
-        AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
-    }
-    for (const u32 gpr : ir.GetRegisters()) {
-        AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
-    }
-    for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
-        AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
-    }
-}
-
-void ARBDecompiler::DecompileAST() {
-    const u32 num_flow_variables = ir.GetASTNumVariables();
-    for (u32 i = 0; i < num_flow_variables; ++i) {
-        AddLine("TEMP F{};", i);
-    }
-    for (u32 i = 0; i < num_flow_variables; ++i) {
-        AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
-    }
-
-    InitializeVariables();
-
-    VisitAST(ir.GetASTProgram());
-}
-
-void ARBDecompiler::DecompileBranchMode() {
-    static constexpr u32 FLOW_STACK_SIZE = 20;
-    if (!ir.IsFlowStackDisabled()) {
-        AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
-        AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
-        AddLine("TEMP SSY_TOP;");
-        AddLine("TEMP PBK_TOP;");
-    }
-
-    AddLine("TEMP PC;");
-
-    if (!ir.IsFlowStackDisabled()) {
-        AddLine("MOV.U SSY_TOP.x, 0;");
-        AddLine("MOV.U PBK_TOP.x, 0;");
-    }
-
-    InitializeVariables();
-
-    const auto basic_block_end = ir.GetBasicBlocks().end();
-    auto basic_block_it = ir.GetBasicBlocks().begin();
-    const u32 first_address = basic_block_it->first;
-    AddLine("MOV.U PC.x, {};", first_address);
-
-    AddLine("REP;");
-
-    std::size_t num_blocks = 0;
-    while (basic_block_it != basic_block_end) {
-        const auto& [address, bb] = *basic_block_it;
-        ++num_blocks;
-
-        AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
-        AddLine("IF NE.x;");
-
-        VisitBlock(bb);
-
-        ++basic_block_it;
-
-        if (basic_block_it != basic_block_end) {
-            const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
-            if (!op || op->GetCode() != OperationCode::Branch) {
-                const u32 next_address = basic_block_it->first;
-                AddLine("MOV.U PC.x, {};", next_address);
-                AddLine("CONT;");
-            }
-        }
-
-        AddLine("ELSE;");
-    }
-    AddLine("RET;");
-    while (num_blocks--) {
-        AddLine("ENDIF;");
-    }
-
-    AddLine("ENDREP;");
-}
-
-void ARBDecompiler::VisitAST(const ASTNode& node) {
-    if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
-        for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
-            VisitAST(current);
-        }
-    } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
-        const std::string condition = VisitExpression(if_then->condition);
-        ResetTemporaries();
-
-        AddLine("MOVC.U RC.x, {};", condition);
-        AddLine("IF NE.x;");
-        for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
-            VisitAST(current);
-        }
-        AddLine("ENDIF;");
-    } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
-        AddLine("ELSE;");
-        for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
-            VisitAST(current);
-        }
-    } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
-        VisitBlock(decoded->nodes);
-    } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
-        AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
-        ResetTemporaries();
-    } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
-        const std::string condition = VisitExpression(do_while->condition);
-        ResetTemporaries();
-        AddLine("REP;");
-        for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
-            VisitAST(current);
-        }
-        AddLine("MOVC.U RC.x, {};", condition);
-        AddLine("BRK (NE.x);");
-        AddLine("ENDREP;");
-    } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
-        const bool is_true = ExprIsTrue(ast_return->condition);
-        if (!is_true) {
-            AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
-            AddLine("IF NE.x;");
-            ResetTemporaries();
-        }
-        if (ast_return->kills) {
-            AddLine("KIL TR;");
-        } else {
-            Exit();
-        }
-        if (!is_true) {
-            AddLine("ENDIF;");
-        }
-    } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
-        if (ExprIsTrue(ast_break->condition)) {
-            AddLine("BRK;");
-        } else {
-            AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
-            AddLine("BRK (NE.x);");
-            ResetTemporaries();
-        }
-    } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
-        // Nothing to do
-    } else {
-        UNREACHABLE();
-    }
-}
-
-std::string ARBDecompiler::VisitExpression(const Expr& node) {
-    if (const auto expr = std::get_if<ExprAnd>(&*node)) {
-        std::string result = AllocTemporary();
-        AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
-                VisitExpression(expr->operand2));
-        return result;
-    }
-    if (const auto expr = std::get_if<ExprOr>(&*node)) {
-        std::string result = AllocTemporary();
-        AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
-                VisitExpression(expr->operand2));
-        return result;
-    }
-    if (const auto expr = std::get_if<ExprNot>(&*node)) {
-        std::string result = AllocTemporary();
-        AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
-        return result;
-    }
-    if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
-        return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
-    }
-    if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
-        return Visit(ir.GetConditionCode(expr->cc));
-    }
-    if (const auto expr = std::get_if<ExprVar>(&*node)) {
-        return fmt::format("F{}.x", expr->var_index);
-    }
-    if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
-        return expr->value ? "0xffffffff" : "0";
-    }
-    if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
-        std::string result = AllocTemporary();
-        AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
-        return result;
-    }
-    UNREACHABLE();
-    return "0";
-}
-
-void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
-    for (const auto& node : bb) {
-        Visit(node);
-    }
-}
-
-std::string ARBDecompiler::Visit(const Node& node) {
-    if (const auto operation = std::get_if<OperationNode>(&*node)) {
-        if (const auto amend_index = operation->GetAmendIndex()) {
-            Visit(ir.GetAmendNode(*amend_index));
-        }
-        const std::size_t index = static_cast<std::size_t>(operation->GetCode());
-        if (index >= OPERATION_DECOMPILERS.size()) {
-            UNREACHABLE_MSG("Out of bounds operation: {}", index);
-            return {};
-        }
-        const auto decompiler = OPERATION_DECOMPILERS[index];
-        if (decompiler == nullptr) {
-            UNREACHABLE_MSG("Undefined operation: {}", index);
-            return {};
-        }
-        return (this->*decompiler)(*operation);
-    }
-
-    if (const auto gpr = std::get_if<GprNode>(&*node)) {
-        const u32 index = gpr->GetIndex();
-        if (index == Register::ZeroIndex) {
-            return "{0, 0, 0, 0}.x";
-        }
-        return fmt::format("R{}.x", index);
-    }
-
-    if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
-        return fmt::format("CV{}.x", cv->GetIndex());
-    }
-
-    if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
-        std::string temporary = AllocTemporary();
-        AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
-        return temporary;
-    }
-
-    if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
-        std::string temporary = AllocTemporary();
-        switch (const auto index = predicate->GetIndex(); index) {
-        case Tegra::Shader::Pred::UnusedIndex:
-            AddLine("MOV.S {}, -1;", temporary);
-            break;
-        case Tegra::Shader::Pred::NeverExecute:
-            AddLine("MOV.S {}, 0;", temporary);
-            break;
-        default:
-            AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
-            break;
-        }
-        if (predicate->IsNegated()) {
-            AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
-        }
-        return temporary;
-    }
-
-    if (const auto abuf = std::get_if<AbufNode>(&*node)) {
-        if (abuf->IsPhysicalBuffer()) {
-            UNIMPLEMENTED_MSG("Physical buffers are not implemented");
-            return "{0, 0, 0, 0}.x";
-        }
-
-        const Attribute::Index index = abuf->GetIndex();
-        const u32 element = abuf->GetElement();
-        const char swizzle = Swizzle(element);
-        switch (index) {
-        case Attribute::Index::Position: {
-            if (stage == ShaderType::Geometry) {
-                return fmt::format("{}_position[{}].{}", StageInputName(stage),
-                                   Visit(abuf->GetBuffer()), swizzle);
-            } else {
-                return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
-            }
-        }
-        case Attribute::Index::TessCoordInstanceIDVertexID:
-            ASSERT(stage == ShaderType::Vertex);
-            switch (element) {
-            case 2:
-                return "vertex.instance";
-            case 3:
-                return "vertex.id";
-            }
-            UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
-            break;
-        case Attribute::Index::PointCoord:
-            switch (element) {
-            case 0:
-                return "fragment.pointcoord.x";
-            case 1:
-                return "fragment.pointcoord.y";
-            }
-            UNIMPLEMENTED();
-            break;
-        case Attribute::Index::FrontFacing: {
-            ASSERT(stage == ShaderType::Fragment);
-            ASSERT(element == 3);
-            const std::string temporary = AllocVectorTemporary();
-            AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
-            AddLine("MOV.U.CC RC.x, -RC;");
-            AddLine("MOV.S {}.x, 0;", temporary);
-            AddLine("MOV.S {}.x (NE.x), -1;", temporary);
-            return fmt::format("{}.x", temporary);
-        }
-        default:
-            if (IsGenericAttribute(index)) {
-                if (stage == ShaderType::Geometry) {
-                    return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
-                                       Visit(abuf->GetBuffer()), swizzle);
-                } else {
-                    return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
-                                       GetGenericAttributeIndex(index), swizzle);
-                }
-            }
-            UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
-            break;
-        }
-        return "{0, 0, 0, 0}.x";
-    }
-
-    if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
-        std::string offset_string;
-        const auto& offset = cbuf->GetOffset();
-        if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
-            offset_string = std::to_string(imm->GetValue());
-        } else {
-            offset_string = Visit(offset);
-        }
-        std::string temporary = AllocTemporary();
-        AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
-        return temporary;
-    }
-
-    if (const auto gmem = std::get_if<GmemNode>(&*node)) {
-        std::string temporary = AllocTemporary();
-        AddLine("MOV {}, 0;", temporary);
-        AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
-        return temporary;
-    }
-
-    if (const auto lmem = std::get_if<LmemNode>(&*node)) {
-        std::string temporary = Visit(lmem->GetAddress());
-        AddLine("SHR.U {}, {}, 2;", temporary, temporary);
-        AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
-        return temporary;
-    }
-
-    if (const auto smem = std::get_if<SmemNode>(&*node)) {
-        std::string temporary = Visit(smem->GetAddress());
-        AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
-        return temporary;
-    }
-
-    if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
-        const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
-        return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
-    }
-
-    if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
-        if (const auto amend_index = conditional->GetAmendIndex()) {
-            Visit(ir.GetAmendNode(*amend_index));
-        }
-        AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
-        AddLine("IF NE.x;");
-        VisitBlock(conditional->GetCode());
-        AddLine("ENDIF;");
-        return {};
-    }
-
-    if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
-        // Uncommenting this will generate invalid code. GLASM lacks comments.
-        // AddLine("// {}", cmt->GetText());
-        return {};
-    }
-
-    UNIMPLEMENTED();
-    return {};
-}
-
-std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
-    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-    UNIMPLEMENTED_IF(meta.sampler.is_indexed);
-
-    const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
-                             meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
-    const std::size_t count = operation.GetOperandsCount();
-    std::string temporary = AllocVectorTemporary();
-    std::size_t i = 0;
-    for (; i < count; ++i) {
-        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
-    }
-    if (meta.sampler.is_array) {
-        AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
-        ++i;
-    }
-    if (meta.sampler.is_shadow) {
-        std::string compare = Visit(meta.depth_compare);
-        if (is_extended) {
-            ASSERT(i == 4);
-            std::string extra_coord = AllocVectorTemporary();
-            AddLine("MOV.F {}.x, {};", extra_coord, compare);
-            return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
-        }
-        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
-        ++i;
-    }
-    return {temporary, temporary, i};
-}
-
-std::string ARBDecompiler::BuildAoffi(Operation operation) {
-    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-    if (meta.aoffi.empty()) {
-        return {};
-    }
-    const std::string temporary = AllocVectorTemporary();
-    std::size_t i = 0;
-    for (auto& node : meta.aoffi) {
-        AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
-    }
-    return fmt::format(", offset({})", temporary);
-}
-
-std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
-    // Read a bindless SSBO, return its address and set CC accordingly
-    // address = c[binding].xy
-    // length  = c[binding].z
-    const u32 binding = global_memory_names.at(gmem.GetDescriptor());
-
-    const std::string pointer = AllocLongVectorTemporary();
-    std::string temporary = AllocTemporary();
-
-    AddLine("PK64.U {}, c[{}];", pointer, binding);
-    AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
-            Visit(gmem.GetBaseAddress()));
-    AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
-    AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
-    // Compare offset to length and set CC
-    AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
-    return fmt::format("{}.x", pointer);
-}
-
-void ARBDecompiler::Exit() {
-    if (stage != ShaderType::Fragment) {
-        AddLine("RET;");
-        return;
-    }
-
-    const auto safe_get_register = [this](u32 reg) -> std::string {
-        if (ir.GetRegisters().contains(reg)) {
-            return fmt::format("R{}.x", reg);
-        }
-        return "{0, 0, 0, 0}.x";
-    };
-
-    const auto& header = ir.GetHeader();
-    u32 current_reg = 0;
-    for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
-        for (u32 component = 0; component < 4; ++component) {
-            if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
-                continue;
-            }
-            AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
-                    safe_get_register(current_reg));
-            ++current_reg;
-        }
-    }
-    if (header.ps.omap.depth) {
-        AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
-    }
-
-    AddLine("RET;");
-}
-
-std::string ARBDecompiler::Assign(Operation operation) {
-    const Node& dest = operation[0];
-    const Node& src = operation[1];
-
-    std::string dest_name;
-    if (const auto gpr = std::get_if<GprNode>(&*dest)) {
-        if (gpr->GetIndex() == Register::ZeroIndex) {
-            // Writing to Register::ZeroIndex is a no op
-            return {};
-        }
-        dest_name = fmt::format("R{}.x", gpr->GetIndex());
-    } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
-        const u32 element = abuf->GetElement();
-        const char swizzle = Swizzle(element);
-        switch (const Attribute::Index index = abuf->GetIndex()) {
-        case Attribute::Index::Position:
-            dest_name = fmt::format("result.position.{}", swizzle);
-            break;
-        case Attribute::Index::LayerViewportPointSize:
-            switch (element) {
-            case 0:
-                UNIMPLEMENTED();
-                return {};
-            case 1:
-            case 2:
-                if (!device.HasNvViewportArray2()) {
-                    LOG_ERROR(
-                        Render_OpenGL,
-                        "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
-                    return {};
-                }
-                dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
-                break;
-            case 3:
-                dest_name = "result.pointsize.x";
-                break;
-            }
-            break;
-        case Attribute::Index::ClipDistances0123:
-            dest_name = fmt::format("result.clip[{}].x", element);
-            break;
-        case Attribute::Index::ClipDistances4567:
-            dest_name = fmt::format("result.clip[{}].x", element + 4);
-            break;
-        default:
-            if (!IsGenericAttribute(index)) {
-                UNREACHABLE();
-                return {};
-            }
-            dest_name =
-                fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
-            break;
-        }
-    } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
-        const std::string address = Visit(lmem->GetAddress());
-        AddLine("SHR.U {}, {}, 2;", address, address);
-        dest_name = fmt::format("lmem[{}].x", address);
-    } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
-        AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
-        ResetTemporaries();
-        return {};
-    } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
-        AddLine("IF NE.x;");
-        AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
-        AddLine("ENDIF;");
-        ResetTemporaries();
-        return {};
-    } else {
-        UNREACHABLE();
-        ResetTemporaries();
-        return {};
-    }
-
-    AddLine("MOV.U {}, {};", dest_name, Visit(src));
-    ResetTemporaries();
-    return {};
-}
-
-std::string ARBDecompiler::Select(Operation operation) {
-    std::string temporary = AllocTemporary();
-    AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
-            Visit(operation[2]));
-    return temporary;
-}
-
-std::string ARBDecompiler::FClamp(Operation operation) {
-    // 1.0f in hex, replace with std::bit_cast on C++20
-    static constexpr u32 POSITIVE_ONE = 0x3f800000;
-
-    std::string temporary = AllocTemporary();
-    const Node& value = operation[0];
-    const Node& low = operation[1];
-    const Node& high = operation[2];
-    const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
-    const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
-    if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
-        AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
-    } else {
-        AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
-        AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
-    }
-    return temporary;
-}
-
-std::string ARBDecompiler::FCastHalf0(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FCastHalf1(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
-    AddLine("MOV {}.x, {}.y;", temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FSqrt(Operation operation) {
-    std::string temporary = AllocTemporary();
-    AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
-    AddLine("RCP.F32 {}, {};", temporary, temporary);
-    return temporary;
-}
-
-std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    if (!device.HasWarpIntrinsics()) {
-        LOG_ERROR(Render_OpenGL,
-                  "NV_shader_thread_shuffle is missing. Kepler or better is required.");
-        AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
-        return fmt::format("{}.x", temporary);
-    }
-
-    AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
-    AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
-    AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
-    AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
-    AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
-    AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
-    AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HAdd2(Operation operation) {
-    const std::string tmp1 = AllocVectorTemporary();
-    const std::string tmp2 = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
-    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
-    AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
-    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
-    return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HMul2(Operation operation) {
-    const std::string tmp1 = AllocVectorTemporary();
-    const std::string tmp2 = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
-    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
-    AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
-    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
-    return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HFma2(Operation operation) {
-    const std::string tmp1 = AllocVectorTemporary();
-    const std::string tmp2 = AllocVectorTemporary();
-    const std::string tmp3 = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
-    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
-    AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
-    AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
-    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
-    return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HAbsolute(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
-    AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HNegate(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
-    AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
-    AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
-    AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
-    AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
-    AddLine("PK2H.F {}.x, {};", temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HClamp(Operation operation) {
-    const std::string tmp1 = AllocVectorTemporary();
-    const std::string tmp2 = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
-    AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
-    AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
-    AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
-    AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
-    AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
-    AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
-    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
-    return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HCastFloat(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
-    AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
-    AddLine("PK2H.F {}.x, {};", temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HUnpack(Operation operation) {
-    std::string operand = Visit(operation[0]);
-    switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
-    case Tegra::Shader::HalfType::H0_H1:
-        return operand;
-    case Tegra::Shader::HalfType::F32: {
-        const std::string temporary = AllocVectorTemporary();
-        AddLine("MOV.U {}.x, {};", temporary, operand);
-        AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
-        AddLine("PK2H.F {}.x, {};", temporary, temporary);
-        return fmt::format("{}.x", temporary);
-    }
-    case Tegra::Shader::HalfType::H0_H0: {
-        const std::string temporary = AllocVectorTemporary();
-        AddLine("UP2H.F {}.xy, {};", temporary, operand);
-        AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
-        AddLine("PK2H.F {}.x, {};", temporary, temporary);
-        return fmt::format("{}.x", temporary);
-    }
-    case Tegra::Shader::HalfType::H1_H1: {
-        const std::string temporary = AllocVectorTemporary();
-        AddLine("UP2H.F {}.xy, {};", temporary, operand);
-        AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
-        AddLine("PK2H.F {}.x, {};", temporary, temporary);
-        return fmt::format("{}.x", temporary);
-    }
-    }
-    UNREACHABLE();
-    return "{0, 0, 0, 0}.x";
-}
-
-std::string ARBDecompiler::HMergeF32(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH0(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
-    AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
-    AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
-    AddLine("PK2H.F {}.x, {};", temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH1(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
-    AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
-    AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
-    AddLine("PK2H.F {}.x, {};", temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HPack2(Operation operation) {
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
-    AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
-    AddLine("PK2H.F {}.x, {};", temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::LogicalAssign(Operation operation) {
-    const Node& dest = operation[0];
-    const Node& src = operation[1];
-
-    std::string target;
-
-    if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
-        ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
-        const Tegra::Shader::Pred index = pred->GetIndex();
-        switch (index) {
-        case Tegra::Shader::Pred::NeverExecute:
-        case Tegra::Shader::Pred::UnusedIndex:
-            // Writing to these predicates is a no-op
-            return {};
-        }
-        target = fmt::format("P{}.x", static_cast<u64>(index));
-    } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
-        const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
-        target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
-    } else {
-        UNREACHABLE();
-        ResetTemporaries();
-        return {};
-    }
-
-    AddLine("MOV.U {}, {};", target, Visit(src));
-    ResetTemporaries();
-    return {};
-}
-
-std::string ARBDecompiler::LogicalPick2(Operation operation) {
-    std::string temporary = AllocTemporary();
-    const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
-    AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
-    return temporary;
-}
-
-std::string ARBDecompiler::LogicalAnd2(Operation operation) {
-    std::string temporary = AllocTemporary();
-    const std::string op = Visit(operation[0]);
-    AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
-    return temporary;
-}
-
-std::string ARBDecompiler::FloatOrdered(Operation operation) {
-    std::string temporary = AllocTemporary();
-    AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
-    AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
-    AddLine("MOV.S {}, -1;", temporary);
-    AddLine("MOV.S {} (NAN.x), 0;", temporary);
-    AddLine("MOV.S {} (NAN.y), 0;", temporary);
-    return temporary;
-}
-
-std::string ARBDecompiler::FloatUnordered(Operation operation) {
-    std::string temporary = AllocTemporary();
-    AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
-    AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
-    AddLine("MOV.S {}, 0;", temporary);
-    AddLine("MOV.S {} (NAN.x), -1;", temporary);
-    AddLine("MOV.S {} (NAN.y), -1;", temporary);
-    return temporary;
-}
-
-std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
-    std::string temporary = AllocTemporary();
-    AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
-    AddLine("MOV.S {}, 0;", temporary);
-    AddLine("IF CF.x;");
-    AddLine("MOV.S {}, -1;", temporary);
-    AddLine("ENDIF;");
-    return temporary;
-}
-
-std::string ARBDecompiler::Texture(Operation operation) {
-    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-    const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
-    std::string_view opcode = "TEX";
-    std::string extra;
-    if (meta.bias) {
-        ASSERT(!meta.lod);
-        opcode = "TXB";
-
-        if (swizzle < 4) {
-            AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
-        } else {
-            const std::string bias = AllocTemporary();
-            AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
-            extra = fmt::format(" {},", bias);
-        }
-    }
-    if (meta.lod) {
-        ASSERT(!meta.bias);
-        opcode = "TXL";
-
-        if (swizzle < 4) {
-            AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
-        } else {
-            const std::string lod = AllocTemporary();
-            AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
-            extra = fmt::format(" {},", lod);
-        }
-    }
-
-    AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
-            TextureType(meta), BuildAoffi(operation));
-    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGather(Operation operation) {
-    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-    const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
-    std::string comp;
-    if (!meta.sampler.is_shadow) {
-        const auto& immediate = std::get<ImmediateNode>(*meta.component);
-        comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
-    }
-
-    AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
-            TextureType(meta), BuildAoffi(operation));
-    AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
-    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-    const std::string temporary = AllocVectorTemporary();
-    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
-    ASSERT(!meta.sampler.is_array);
-
-    const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
-    AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
-    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryLod(Operation operation) {
-    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-    const std::string temporary = AllocVectorTemporary();
-    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
-    ASSERT(!meta.sampler.is_array);
-
-    const std::size_t count = operation.GetOperandsCount();
-    for (std::size_t i = 0; i < count; ++i) {
-        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
-    }
-    AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
-    AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
-    AddLine("TRUNC.S {}, {};", temporary, temporary);
-    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TexelFetch(Operation operation) {
-    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-    const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
-    if (!meta.sampler.is_buffer) {
-        ASSERT(swizzle < 4);
-        AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
-    }
-    AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
-            BuildAoffi(operation));
-    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGradient(Operation operation) {
-    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-    const std::string ddx = AllocVectorTemporary();
-    const std::string ddy = AllocVectorTemporary();
-    const std::string coord = std::get<1>(BuildCoords(operation));
-
-    const std::size_t num_components = meta.derivates.size() / 2;
-    for (std::size_t index = 0; index < num_components; ++index) {
-        const char swizzle = Swizzle(index);
-        AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
-        AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
-    }
-
-    const std::string_view result = coord;
-    AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
-            TextureType(meta), BuildAoffi(operation));
-    AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
-    return fmt::format("{}.x", result);
-}
-
-std::string ARBDecompiler::ImageLoad(Operation operation) {
-    const auto& meta = std::get<MetaImage>(operation.GetMeta());
-    const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
-    const std::size_t count = operation.GetOperandsCount();
-    const std::string_view type = ImageType(meta.image.type);
-
-    const std::string temporary = AllocVectorTemporary();
-    for (std::size_t i = 0; i < count; ++i) {
-        AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
-    }
-    AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
-    AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::ImageStore(Operation operation) {
-    const auto& meta = std::get<MetaImage>(operation.GetMeta());
-    const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
-    const std::size_t num_coords = operation.GetOperandsCount();
-    const std::size_t num_values = meta.values.size();
-    const std::string_view type = ImageType(meta.image.type);
-
-    const std::string coord = AllocVectorTemporary();
-    const std::string value = AllocVectorTemporary();
-    for (std::size_t i = 0; i < num_coords; ++i) {
-        AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
-    }
-    for (std::size_t i = 0; i < num_values; ++i) {
-        AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
-    }
-    AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
-    return {};
-}
-
-std::string ARBDecompiler::Branch(Operation operation) {
-    const auto target = std::get<ImmediateNode>(*operation[0]);
-    AddLine("MOV.U PC.x, {};", target.GetValue());
-    AddLine("CONT;");
-    return {};
-}
-
-std::string ARBDecompiler::BranchIndirect(Operation operation) {
-    AddLine("MOV.U PC.x, {};", Visit(operation[0]));
-    AddLine("CONT;");
-    return {};
-}
-
-std::string ARBDecompiler::PushFlowStack(Operation operation) {
-    const auto stack = std::get<MetaStackClass>(operation.GetMeta());
-    const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
-    const std::string_view stack_name = StackName(stack);
-    AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
-    AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
-    return {};
-}
-
-std::string ARBDecompiler::PopFlowStack(Operation operation) {
-    const auto stack = std::get<MetaStackClass>(operation.GetMeta());
-    const std::string_view stack_name = StackName(stack);
-    AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
-    AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
-    AddLine("CONT;");
-    return {};
-}
-
-std::string ARBDecompiler::Exit(Operation) {
-    Exit();
-    return {};
-}
-
-std::string ARBDecompiler::Discard(Operation) {
-    AddLine("KIL TR;");
-    return {};
-}
-
-std::string ARBDecompiler::EmitVertex(Operation) {
-    AddLine("EMIT;");
-    return {};
-}
-
-std::string ARBDecompiler::EndPrimitive(Operation) {
-    AddLine("ENDPRIM;");
-    return {};
-}
-
-std::string ARBDecompiler::InvocationId(Operation) {
-    return "primitive.invocation";
-}
-
-std::string ARBDecompiler::YNegate(Operation) {
-    LOG_WARNING(Render_OpenGL, "(STUBBED)");
-    std::string temporary = AllocTemporary();
-    AddLine("MOV.F {}, 1;", temporary);
-    return temporary;
-}
-
-std::string ARBDecompiler::ThreadId(Operation) {
-    return fmt::format("{}.threadid", StageInputName(stage));
-}
-
-std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
-    if (!device.HasWarpIntrinsics()) {
-        LOG_ERROR(Render_OpenGL,
-                  "NV_shader_thread_shuffle is missing. Kepler or better is required.");
-        return Visit(operation[0]);
-    }
-    const std::string temporary = AllocVectorTemporary();
-    AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
-            Visit(operation[1]));
-    AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
-    return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::Barrier(Operation) {
-    AddLine("BAR;");
-    return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
-    AddLine("MEMBAR.CTA;");
-    return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
-    AddLine("MEMBAR;");
-    return {};
-}
-
-} // Anonymous namespace
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
-                                    const VideoCommon::Shader::Registry& registry,
-                                    Tegra::Engines::ShaderType stage, std::string_view identifier) {
-    return ARBDecompiler(device, ir, registry, stage, identifier).Code();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string>
-#include <string_view>
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-enum class ShaderType : u32;
-}
-
-namespace VideoCommon::Shader {
-class ShaderIR;
-class Registry;
-} // namespace VideoCommon::Shader
-
-namespace OpenGL {
-
-class Device;
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
-                                    const VideoCommon::Shader::Registry& registry,
-                                    Tegra::Engines::ShaderType stage, std::string_view identifier);
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index a02a45e04..07a995f7d 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,14 +2,18 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <span>
 
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
 
 namespace OpenGL {
 namespace {
+using VideoCore::Surface::PixelFormat;
+
 struct BindlessSSBO {
     GLuint64EXT address;
     GLsizei length;
@@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{
     GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
     GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
 };
+
+[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
+    switch (gl_format) {
+    case GL_RGBA8_SNORM:
+        return GL_RGBA8;
+    case GL_R8_SNORM:
+        return GL_R8;
+    case GL_RGBA16_SNORM:
+        return GL_RGBA16;
+    case GL_R16_SNORM:
+        return GL_R16;
+    case GL_RG16_SNORM:
+        return GL_RG16;
+    case GL_RG8_SNORM:
+        return GL_RG8;
+    default:
+        return gl_format;
+    }
+}
 } // Anonymous namespace
 
 Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
@@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept {
     glMakeNamedBufferResidentNV(buffer.handle, access);
 }
 
+GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
+    const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
+        return offset == view.offset && size == view.size && format == view.format;
+    })};
+    if (it != views.end()) {
+        return it->texture.handle;
+    }
+    OGLTexture texture;
+    texture.Create(GL_TEXTURE_BUFFER);
+    const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
+    const GLenum texture_format{GetTextureBufferFormat(gl_format)};
+    if (texture_format != gl_format) {
+        LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
+    }
+    glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
+    views.push_back({
+        .offset = offset,
+        .size = size,
+        .format = format,
+        .texture = std::move(texture),
+    });
+    return views.back().texture.handle;
+}
+
 BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
     : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
       use_assembly_shaders{device.UseAssemblyShaders()},
@@ -144,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
         glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
                             static_cast<GLsizeiptr>(size));
     } else {
-        const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+        const GLuint base_binding = graphics_base_uniform_bindings[stage];
         const GLuint binding = base_binding + binding_index;
         glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -171,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
 
 void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
                                            u32 offset, u32 size, bool is_written) {
-    if (use_assembly_shaders) {
+    if (use_storage_buffers) {
+        const GLuint base_binding = graphics_base_storage_bindings[stage];
+        const GLuint binding = base_binding + binding_index;
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    } else {
         const BindlessSSBO ssbo{
             .address = buffer.HostGpuAddr() + offset,
             .length = static_cast<GLsizei>(size),
@@ -180,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
         buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
         glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
                                         reinterpret_cast<const GLuint*>(&ssbo));
-    } else {
-        const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
-        const GLuint binding = base_binding + binding_index;
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
-                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
     }
 }
 
 void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
                                                   u32 size, bool is_written) {
-    if (use_assembly_shaders) {
+    if (use_storage_buffers) {
+        if (size != 0) {
+            glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+                              static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+        } else {
+            glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+        }
+    } else {
         const BindlessSSBO ssbo{
             .address = buffer.HostGpuAddr() + offset,
             .length = static_cast<GLsizei>(size),
@@ -199,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
         buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
         glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
                                         reinterpret_cast<const GLuint*>(&ssbo));
-    } else if (size == 0) {
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
-    } else {
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
-                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
     }
 }
 
@@ -213,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
                       static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
 }
 
+void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+                                           PixelFormat format) {
+    *texture_handles++ = buffer.View(offset, size, format);
+}
+
+void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
+    *image_handles++ = buffer.View(offset, size, format);
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index fe91aa452..060d36427 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -32,6 +32,8 @@ public:
 
     void MakeResident(GLenum access) noexcept;
 
+    [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
+
     [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
         return address;
     }
@@ -41,9 +43,17 @@ public:
     }
 
 private:
+    struct BufferView {
+        u32 offset;
+        u32 size;
+        VideoCore::Surface::PixelFormat format;
+        OGLTexture texture;
+    };
+
     GLuint64EXT address = 0;
     OGLBuffer buffer;
     GLenum current_residency_access = GL_NONE;
+    std::vector<BufferView> views;
 };
 
 class BufferCacheRuntime {
@@ -75,17 +85,21 @@ public:
 
     void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
 
+    void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+                           VideoCore::Surface::PixelFormat format);
+
+    void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
+                         VideoCore::Surface::PixelFormat format);
+
     void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+        const GLuint handle = fast_uniforms[stage][binding_index].handle;
+        const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
         if (use_assembly_shaders) {
-            const GLuint handle = fast_uniforms[stage][binding_index].handle;
-            const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
             glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
         } else {
-            const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+            const GLuint base_binding = graphics_base_uniform_bindings[stage];
             const GLuint binding = base_binding + binding_index;
-            glBindBufferRange(GL_UNIFORM_BUFFER, binding,
-                              fast_uniforms[stage][binding_index].handle, 0,
-                              static_cast<GLsizeiptr>(size));
+            glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
         }
     }
 
@@ -103,7 +117,7 @@ public:
 
     std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
         const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
-        const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+        const GLuint base_binding = graphics_base_uniform_bindings[stage];
         const GLuint binding = base_binding + binding_index;
         glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -118,6 +132,27 @@ public:
         return has_fast_buffer_sub_data;
     }
 
+    [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
+        return !use_assembly_shaders;
+    }
+
+    void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
+        graphics_base_uniform_bindings = bindings;
+    }
+
+    void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
+        graphics_base_storage_bindings = bindings;
+    }
+
+    void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
+        texture_handles = texture_handles_;
+        image_handles = image_handles_;
+    }
+
+    void SetEnableStorageBuffers(bool use_storage_buffers_) {
+        use_storage_buffers = use_storage_buffers_;
+    }
+
 private:
     static constexpr std::array PABO_LUT{
         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -131,8 +166,15 @@ private:
     bool use_assembly_shaders = false;
     bool has_unified_vertex_buffers = false;
 
+    bool use_storage_buffers = false;
+
     u32 max_attributes = 0;
 
+    std::array<GLuint, 5> graphics_base_uniform_bindings{};
+    std::array<GLuint, 5> graphics_base_storage_bindings{};
+    GLuint* texture_handles = nullptr;
+    GLuint* image_handles = nullptr;
+
     std::optional<StreamBuffer> stream_buffer;
 
     std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@@ -156,6 +198,7 @@ struct BufferCacheParams {
     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
     static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
     static constexpr bool USE_MEMORY_MAPS = false;
+    static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
 };
 
 using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
new file mode 100644
index 000000000..aa1cc592f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -0,0 +1,209 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/cityhash.h"
+#include "common/settings.h" // for enum class Settings::ShaderBackend
+#include "video_core/renderer_opengl/gl_compute_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 16;
+
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+    u32 num{};
+    for (const auto& desc : range) {
+        num += desc.count;
+    }
+    return num;
+}
+
+size_t ComputePipelineKey::Hash() const noexcept {
+    return static_cast<size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
+}
+
+bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
+    return std::memcmp(this, &rhs, sizeof *this) == 0;
+}
+
+ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
+                                 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+                                 Tegra::Engines::KeplerCompute& kepler_compute_,
+                                 ProgramManager& program_manager_, const Shader::Info& info_,
+                                 std::string code, std::vector<u32> code_v)
+    : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
+      kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
+    switch (device.GetShaderBackend()) {
+    case Settings::ShaderBackend::GLSL:
+        source_program = CreateProgram(code, GL_COMPUTE_SHADER);
+        break;
+    case Settings::ShaderBackend::GLASM:
+        assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
+        break;
+    case Settings::ShaderBackend::SPIRV:
+        source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
+        break;
+    }
+    std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
+                uniform_buffer_sizes.begin());
+
+    num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
+    num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
+
+    const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
+    ASSERT(num_textures <= MAX_TEXTURES);
+
+    const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
+    ASSERT(num_images <= MAX_IMAGES);
+
+    const bool is_glasm{assembly_program.handle != 0};
+    const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
+    use_storage_buffers =
+        !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
+    writes_global_memory = !use_storage_buffers &&
+                           std::ranges::any_of(info.storage_buffers_descriptors,
+                                               [](const auto& desc) { return desc.is_written; });
+}
+
+void ComputePipeline::Configure() {
+    buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
+    buffer_cache.UnbindComputeStorageBuffers();
+    size_t ssbo_index{};
+    for (const auto& desc : info.storage_buffers_descriptors) {
+        ASSERT(desc.count == 1);
+        buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
+                                              desc.is_written);
+        ++ssbo_index;
+    }
+    texture_cache.SynchronizeComputeDescriptors();
+
+    std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+    boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+    std::array<GLuint, MAX_TEXTURES> samplers;
+    std::array<GLuint, MAX_TEXTURES> textures;
+    std::array<GLuint, MAX_IMAGES> images;
+    GLsizei sampler_binding{};
+    GLsizei texture_binding{};
+    GLsizei image_binding{};
+
+    const auto& qmd{kepler_compute.launch_description};
+    const auto& cbufs{qmd.const_buffer_config};
+    const bool via_header_index{qmd.linked_tsc != 0};
+    const auto read_handle{[&](const auto& desc, u32 index) {
+        ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
+        const u32 index_offset{index << desc.size_shift};
+        const u32 offset{desc.cbuf_offset + index_offset};
+        const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
+        if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+                      std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+            if (desc.has_secondary) {
+                ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
+                const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
+                const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
+                                             secondary_offset};
+                const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+                const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+                return TexturePair(lhs_raw | rhs_raw, via_header_index);
+            }
+        }
+        return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+    }};
+    const auto add_image{[&](const auto& desc) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            const auto handle{read_handle(desc, index)};
+            image_view_indices.push_back(handle.first);
+        }
+    }};
+    for (const auto& desc : info.texture_buffer_descriptors) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            const auto handle{read_handle(desc, index)};
+            image_view_indices.push_back(handle.first);
+            samplers[sampler_binding++] = 0;
+        }
+    }
+    std::ranges::for_each(info.image_buffer_descriptors, add_image);
+    for (const auto& desc : info.texture_descriptors) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            const auto handle{read_handle(desc, index)};
+            image_view_indices.push_back(handle.first);
+
+            Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
+            samplers[sampler_binding++] = sampler->Handle();
+        }
+    }
+    std::ranges::for_each(info.image_descriptors, add_image);
+
+    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+    if (assembly_program.handle != 0) {
+        program_manager.BindComputeAssemblyProgram(assembly_program.handle);
+    } else {
+        program_manager.BindComputeProgram(source_program.handle);
+    }
+    buffer_cache.UnbindComputeTextureBuffers();
+    size_t texbuf_index{};
+    const auto add_buffer{[&](const auto& desc) {
+        constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+        for (u32 i = 0; i < desc.count; ++i) {
+            bool is_written{false};
+            if constexpr (is_image) {
+                is_written = desc.is_written;
+            }
+            ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
+            buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
+                                                  image_view.BufferSize(), image_view.format,
+                                                  is_written, is_image);
+            ++texbuf_index;
+        }
+    }};
+    std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
+    std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
+
+    buffer_cache.UpdateComputeBuffers();
+
+    buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
+    buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
+    buffer_cache.BindHostComputeBuffers();
+
+    const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
+    texture_binding += num_texture_buffers;
+    image_binding += num_image_buffers;
+
+    for (const auto& desc : info.texture_descriptors) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+            textures[texture_binding++] = image_view.Handle(desc.type);
+        }
+    }
+    for (const auto& desc : info.image_descriptors) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+            if (desc.is_written) {
+                texture_cache.MarkModification(image_view.image_id);
+            }
+            images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+        }
+    }
+    if (texture_binding != 0) {
+        ASSERT(texture_binding == sampler_binding);
+        glBindTextures(0, texture_binding, textures.data());
+        glBindSamplers(0, sampler_binding, samplers.data());
+    }
+    if (image_binding != 0) {
+        glBindImageTextures(0, image_binding, images.data());
+    }
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
new file mode 100644
index 000000000..50c676365
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -0,0 +1,93 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+#include <utility>
+
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines {
+class KeplerCompute;
+}
+
+namespace Shader {
+struct Info;
+}
+
+namespace OpenGL {
+
+class Device;
+class ProgramManager;
+
+struct ComputePipelineKey {
+    u64 unique_hash;
+    u32 shared_memory_size;
+    std::array<u32, 3> workgroup_size;
+
+    size_t Hash() const noexcept;
+
+    bool operator==(const ComputePipelineKey&) const noexcept;
+
+    bool operator!=(const ComputePipelineKey& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+};
+static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
+static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
+static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
+
+class ComputePipeline {
+public:
+    explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
+                             BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+                             Tegra::Engines::KeplerCompute& kepler_compute_,
+                             ProgramManager& program_manager_, const Shader::Info& info_,
+                             std::string code, std::vector<u32> code_v);
+
+    void Configure();
+
+    [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+        return writes_global_memory;
+    }
+
+private:
+    TextureCache& texture_cache;
+    BufferCache& buffer_cache;
+    Tegra::MemoryManager& gpu_memory;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    ProgramManager& program_manager;
+
+    Shader::Info info;
+    OGLProgram source_program;
+    OGLAssemblyProgram assembly_program;
+    VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
+
+    u32 num_texture_buffers{};
+    u32 num_image_buffers{};
+
+    bool use_storage_buffers{};
+    bool writes_global_memory{};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::ComputePipelineKey> {
+    size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3b00614e7..9692b8e94 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -17,39 +17,17 @@
 #include "common/logging/log.h"
 #include "common/scope_exit.h"
 #include "common/settings.h"
+#include "shader_recompiler/stage.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 
 namespace OpenGL {
 namespace {
-// One uniform block is reserved for emulation purposes
-constexpr u32 ReservedUniformBlocks = 1;
-
-constexpr u32 NumStages = 5;
-
 constexpr std::array LIMIT_UBOS = {
     GL_MAX_VERTEX_UNIFORM_BLOCKS,          GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
     GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
     GL_MAX_FRAGMENT_UNIFORM_BLOCKS,        GL_MAX_COMPUTE_UNIFORM_BLOCKS,
 };
-constexpr std::array LIMIT_SSBOS = {
-    GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS,          GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
-    GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
-    GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS,        GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
-};
-constexpr std::array LIMIT_SAMPLERS = {
-    GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
-    GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
-    GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
-    GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
-    GL_MAX_TEXTURE_IMAGE_UNITS,
-    GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
-};
-constexpr std::array LIMIT_IMAGES = {
-    GL_MAX_VERTEX_IMAGE_UNIFORMS,          GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
-    GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
-    GL_MAX_FRAGMENT_IMAGE_UNIFORMS,        GL_MAX_COMPUTE_IMAGE_UNIFORMS,
-};
 
 template <typename T>
 T GetInteger(GLenum pname) {
@@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
     return std::ranges::find(extensions, extension) != extensions.end();
 }
 
-u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
-    ASSERT(num >= amount);
-    if (limit) {
-        amount = std::min(amount, GetInteger<u32>(*limit));
-    }
-    num -= amount;
-    return std::exchange(base, base + amount);
-}
-
-std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
-    std::array<u32, Tegra::Engines::MaxShaderTypes> max;
-    std::ranges::transform(LIMIT_UBOS, max.begin(),
-                           [](GLenum pname) { return GetInteger<u32>(pname); });
+std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
+    std::array<u32, Shader::MaxStageTypes> max;
+    std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
     return max;
 }
 
-std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
-    std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
-
-    static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
-    const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
-    const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
-    const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
-
-    u32 num_ubos = total_ubos - ReservedUniformBlocks;
-    u32 num_ssbos = total_ssbos;
-    u32 num_samplers = total_samplers;
-
-    u32 base_ubo = ReservedUniformBlocks;
-    u32 base_ssbo = 0;
-    u32 base_samplers = 0;
-
-    for (std::size_t i = 0; i < NumStages; ++i) {
-        const std::size_t stage = stage_swizzle[i];
-        bindings[stage] = {
-            Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
-            Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
-            Extract(base_samplers, num_samplers, total_samplers / NumStages,
-                    LIMIT_SAMPLERS[stage])};
-    }
-
-    u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
-    u32 base_images = 0;
-
-    // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
-    // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
-    // fragment stage, and at least 1 for the rest of the stages.
-    // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
-
-    // Reserve at least 4 image bindings on the fragment stage.
-    bindings[4].image =
-        Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
-
-    // This is guaranteed to be at least 1.
-    const u32 total_extracted_images = num_images / (NumStages - 1);
-
-    // Reserve the other image bindings.
-    for (std::size_t i = 0; i < NumStages; ++i) {
-        const std::size_t stage = stage_swizzle[i];
-        if (stage == 4) {
-            continue;
-        }
-        bindings[stage].image =
-            Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
-    }
-
-    // Compute doesn't care about any of this.
-    bindings[5] = {0, 0, 0, 0};
-
-    return bindings;
-}
-
 bool IsASTCSupported() {
-    static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
-    static constexpr std::array formats = {
+    static constexpr std::array targets{
+        GL_TEXTURE_2D,
+        GL_TEXTURE_2D_ARRAY,
+    };
+    static constexpr std::array formats{
         GL_COMPRESSED_RGBA_ASTC_4x4_KHR,           GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
         GL_COMPRESSED_RGBA_ASTC_5x5_KHR,           GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
         GL_COMPRESSED_RGBA_ASTC_6x6_KHR,           GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
@@ -172,11 +87,10 @@ bool IsASTCSupported() {
         GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR,  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
         GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
     };
-    static constexpr std::array required_support = {
+    static constexpr std::array required_support{
         GL_VERTEX_TEXTURE,   GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
         GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE,     GL_COMPUTE_TEXTURE,
     };
-
     for (const GLenum target : targets) {
         for (const GLenum format : formats) {
             for (const GLenum support : required_support) {
@@ -223,14 +137,13 @@ Device::Device() {
             "Beta driver 443.24 is known to have issues. There might be performance issues.");
         disable_fast_buffer_sub_data = true;
     }
-
     max_uniform_buffers = BuildMaxUniformBuffers();
-    base_bindings = BuildBaseBindings();
     uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
     shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
     max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
+    max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
     has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
                           GLAD_GL_NV_shader_thread_shuffle;
     has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -243,18 +156,30 @@ Device::Device() {
     has_precise_bug = TestPreciseBug();
     has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
     has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
+    has_derivative_control = GLAD_GL_ARB_derivative_control;
     has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
     has_debugging_tool_attached = IsDebugToolAttached(extensions);
     has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
+    has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
+    has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
+    has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
+    has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
+    has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
+    warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
+    need_fastmath_off = is_nvidia;
 
     // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
     // uniform buffers as "push constants"
     has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
 
-    use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
+    shader_backend = Settings::values.shader_backend.GetValue();
+    use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
                            GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
                            GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
-
+    if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
+        LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
+        shader_backend = Settings::ShaderBackend::GLSL;
+    }
     // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
     use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
                                !(is_amd || (is_intel && !is_linux));
@@ -265,11 +190,6 @@ Device::Device() {
     LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
     LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
              has_broken_texture_view_formats);
-
-    if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
-        LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
-    }
-
     if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
         LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
     }
@@ -325,22 +245,6 @@ std::string Device::GetVendorName() const {
     return vendor_name;
 }
 
-Device::Device(std::nullptr_t) {
-    max_uniform_buffers.fill(std::numeric_limits<u32>::max());
-    uniform_buffer_alignment = 4;
-    shader_storage_alignment = 4;
-    max_vertex_attributes = 16;
-    max_varyings = 15;
-    max_compute_shared_memory_size = 0x10000;
-    has_warp_intrinsics = true;
-    has_shader_ballot = true;
-    has_vertex_viewport_layer = true;
-    has_image_load_formatted = true;
-    has_texture_shadow_lod = true;
-    has_variable_aoffi = true;
-    has_depth_buffer_float = true;
-}
-
 bool Device::TestVariableAoffi() {
     return TestProgram(R"(#version 430 core
 // This is a unit test, please ignore me on apitrace bug reports.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2c2b13767..ee992aed4 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -6,34 +6,22 @@
 
 #include <cstddef>
 #include "common/common_types.h"
-#include "video_core/engines/shader_type.h"
+#include "shader_recompiler/stage.h"
+
+namespace Settings {
+enum class ShaderBackend : u32;
+};
 
 namespace OpenGL {
 
 class Device {
 public:
-    struct BaseBindings {
-        u32 uniform_buffer{};
-        u32 shader_storage_buffer{};
-        u32 sampler{};
-        u32 image{};
-    };
-
     explicit Device();
-    explicit Device(std::nullptr_t);
 
     [[nodiscard]] std::string GetVendorName() const;
 
-    u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
-        return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
-    }
-
-    const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
-        return base_bindings[stage_index];
-    }
-
-    const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
-        return GetBaseBindings(static_cast<std::size_t>(shader_type));
+    u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
+        return max_uniform_buffers[static_cast<size_t>(stage)];
     }
 
     size_t GetUniformBufferAlignment() const {
@@ -56,6 +44,10 @@ public:
         return max_compute_shared_memory_size;
     }
 
+    u32 GetMaxGLASMStorageBufferBlocks() const {
+        return max_glasm_storage_buffer_blocks;
+    }
+
     bool HasWarpIntrinsics() const {
         return has_warp_intrinsics;
     }
@@ -108,6 +100,10 @@ public:
         return has_nv_viewport_array2;
     }
 
+    bool HasDerivativeControl() const {
+        return has_derivative_control;
+    }
+
     bool HasDebuggingToolAttached() const {
         return has_debugging_tool_attached;
     }
@@ -128,18 +124,52 @@ public:
         return has_depth_buffer_float;
     }
 
+    bool HasGeometryShaderPassthrough() const {
+        return has_geometry_shader_passthrough;
+    }
+
+    bool HasNvGpuShader5() const {
+        return has_nv_gpu_shader_5;
+    }
+
+    bool HasShaderInt64() const {
+        return has_shader_int64;
+    }
+
+    bool HasAmdShaderHalfFloat() const {
+        return has_amd_shader_half_float;
+    }
+
+    bool HasSparseTexture2() const {
+        return has_sparse_texture_2;
+    }
+
+    bool IsWarpSizePotentiallyLargerThanGuest() const {
+        return warp_size_potentially_larger_than_guest;
+    }
+
+    bool NeedsFastmathOff() const {
+        return need_fastmath_off;
+    }
+
+    Settings::ShaderBackend GetShaderBackend() const {
+        return shader_backend;
+    }
+
 private:
     static bool TestVariableAoffi();
     static bool TestPreciseBug();
 
-    std::string vendor_name;
-    std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
-    std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
+    std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
     size_t uniform_buffer_alignment{};
     size_t shader_storage_alignment{};
     u32 max_vertex_attributes{};
     u32 max_varyings{};
     u32 max_compute_shared_memory_size{};
+    u32 max_glasm_storage_buffer_blocks{};
+
+    Settings::ShaderBackend shader_backend{};
+
     bool has_warp_intrinsics{};
     bool has_shader_ballot{};
     bool has_vertex_viewport_layer{};
@@ -153,11 +183,21 @@ private:
     bool has_broken_texture_view_formats{};
     bool has_fast_buffer_sub_data{};
     bool has_nv_viewport_array2{};
+    bool has_derivative_control{};
     bool has_debugging_tool_attached{};
     bool use_assembly_shaders{};
     bool use_asynchronous_shaders{};
     bool use_driver_cache{};
     bool has_depth_buffer_float{};
+    bool has_geometry_shader_passthrough{};
+    bool has_nv_gpu_shader_5{};
+    bool has_shader_int64{};
+    bool has_amd_shader_half_float{};
+    bool has_sparse_texture_2{};
+    bool warp_size_potentially_larger_than_guest{};
+    bool need_fastmath_off{};
+
+    std::string vendor_name;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
new file mode 100644
index 000000000..fac0034fb
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -0,0 +1,572 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <string>
+#include <vector>
+
+#include "common/settings.h" // for enum class Settings::ShaderBackend
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
+#include "video_core/shader_notify.h"
+#include "video_core/texture_cache/texture_cache.h"
+
+#if defined(_MSC_VER) && defined(NDEBUG)
+#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
+#else
+#define LAMBDA_FORCEINLINE
+#endif
+
+namespace OpenGL {
+namespace {
+using Shader::ImageBufferDescriptor;
+using Shader::ImageDescriptor;
+using Shader::TextureBufferDescriptor;
+using Shader::TextureDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 8;
+
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+    u32 num{};
+    for (const auto& desc : range) {
+        num += desc.count;
+    }
+    return num;
+}
+
+GLenum Stage(size_t stage_index) {
+    switch (stage_index) {
+    case 0:
+        return GL_VERTEX_SHADER;
+    case 1:
+        return GL_TESS_CONTROL_SHADER;
+    case 2:
+        return GL_TESS_EVALUATION_SHADER;
+    case 3:
+        return GL_GEOMETRY_SHADER;
+    case 4:
+        return GL_FRAGMENT_SHADER;
+    }
+    UNREACHABLE_MSG("{}", stage_index);
+    return GL_NONE;
+}
+
+GLenum AssemblyStage(size_t stage_index) {
+    switch (stage_index) {
+    case 0:
+        return GL_VERTEX_PROGRAM_NV;
+    case 1:
+        return GL_TESS_CONTROL_PROGRAM_NV;
+    case 2:
+        return GL_TESS_EVALUATION_PROGRAM_NV;
+    case 3:
+        return GL_GEOMETRY_PROGRAM_NV;
+    case 4:
+        return GL_FRAGMENT_PROGRAM_NV;
+    }
+    UNREACHABLE_MSG("{}", stage_index);
+    return GL_NONE;
+}
+
+/// Translates hardware transform feedback indices
+/// @param location Hardware location
+/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
+/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
+std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
+    const u8 index = location / 4;
+    if (index >= 8 && index <= 39) {
+        return {GL_GENERIC_ATTRIB_NV, index - 8};
+    }
+    if (index >= 48 && index <= 55) {
+        return {GL_TEXTURE_COORD_NV, index - 48};
+    }
+    switch (index) {
+    case 7:
+        return {GL_POSITION, 0};
+    case 40:
+        return {GL_PRIMARY_COLOR_NV, 0};
+    case 41:
+        return {GL_SECONDARY_COLOR_NV, 0};
+    case 42:
+        return {GL_BACK_PRIMARY_COLOR_NV, 0};
+    case 43:
+        return {GL_BACK_SECONDARY_COLOR_NV, 0};
+    }
+    UNIMPLEMENTED_MSG("index={}", index);
+    return {GL_POSITION, 0};
+}
+
+template <typename Spec>
+bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
+    for (size_t stage = 0; stage < stage_infos.size(); ++stage) {
+        if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) {
+            return false;
+        }
+        const auto& info{stage_infos[stage]};
+        if constexpr (!Spec::has_storage_buffers) {
+            if (!info.storage_buffers_descriptors.empty()) {
+                return false;
+            }
+        }
+        if constexpr (!Spec::has_texture_buffers) {
+            if (!info.texture_buffer_descriptors.empty()) {
+                return false;
+            }
+        }
+        if constexpr (!Spec::has_image_buffers) {
+            if (!info.image_buffer_descriptors.empty()) {
+                return false;
+            }
+        }
+        if constexpr (!Spec::has_images) {
+            if (!info.image_descriptors.empty()) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
+
+template <typename Spec, typename... Specs>
+ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
+    if constexpr (sizeof...(Specs) > 0) {
+        if (!Passes<Spec>(stage_infos, enabled_mask)) {
+            return FindSpec<Specs...>(stage_infos, enabled_mask);
+        }
+    }
+    return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
+}
+
+struct SimpleVertexFragmentSpec {
+    static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+    static constexpr bool has_storage_buffers = false;
+    static constexpr bool has_texture_buffers = false;
+    static constexpr bool has_image_buffers = false;
+    static constexpr bool has_images = false;
+};
+
+struct SimpleVertexSpec {
+    static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
+    static constexpr bool has_storage_buffers = false;
+    static constexpr bool has_texture_buffers = false;
+    static constexpr bool has_image_buffers = false;
+    static constexpr bool has_images = false;
+};
+
+struct DefaultSpec {
+    static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
+    static constexpr bool has_storage_buffers = true;
+    static constexpr bool has_texture_buffers = true;
+    static constexpr bool has_image_buffers = true;
+    static constexpr bool has_images = true;
+};
+
+ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) {
+    return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask);
+}
+} // Anonymous namespace
+
+GraphicsPipeline::GraphicsPipeline(
+    const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
+    Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+    ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
+    VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
+    std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
+    const GraphicsPipelineKey& key_)
+    : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
+      gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
+      state_tracker{state_tracker_}, key{key_} {
+    if (shader_notify) {
+        shader_notify->MarkShaderBuilding();
+    }
+    u32 num_textures{};
+    u32 num_images{};
+    u32 num_storage_buffers{};
+    for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
+        auto& info{stage_infos[stage]};
+        if (infos[stage]) {
+            info = *infos[stage];
+            enabled_stages_mask |= 1u << stage;
+        }
+        if (stage < 4) {
+            base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
+            base_storage_bindings[stage + 1] = base_storage_bindings[stage];
+
+            base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
+            base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
+        }
+        enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
+        std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
+
+        const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
+        num_texture_buffers[stage] += num_tex_buffer_bindings;
+        num_textures += num_tex_buffer_bindings;
+
+        const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
+        num_image_buffers[stage] += num_img_buffers_bindings;
+        num_images += num_img_buffers_bindings;
+
+        num_textures += AccumulateCount(info.texture_descriptors);
+        num_images += AccumulateCount(info.image_descriptors);
+        num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
+
+        writes_global_memory |= std::ranges::any_of(
+            info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
+    }
+    ASSERT(num_textures <= MAX_TEXTURES);
+    ASSERT(num_images <= MAX_IMAGES);
+
+    const bool assembly_shaders{assembly_programs[0].handle != 0};
+    use_storage_buffers =
+        !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+    writes_global_memory &= !use_storage_buffers;
+    configure_func = ConfigureFunc(stage_infos, enabled_stages_mask);
+
+    if (key.xfb_enabled && device.UseAssemblyShaders()) {
+        GenerateTransformFeedbackState();
+    }
+    const bool in_parallel = thread_worker != nullptr;
+    const auto backend = device.GetShaderBackend();
+    auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
+               shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
+        for (size_t stage = 0; stage < 5; ++stage) {
+            switch (backend) {
+            case Settings::ShaderBackend::GLSL:
+                if (!sources[stage].empty()) {
+                    source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
+                }
+                break;
+            case Settings::ShaderBackend::GLASM:
+                if (!sources[stage].empty()) {
+                    assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
+                    if (in_parallel) {
+                        // Make sure program is built before continuing when building in parallel
+                        glGetString(GL_PROGRAM_ERROR_STRING_NV);
+                    }
+                }
+                break;
+            case Settings::ShaderBackend::SPIRV:
+                if (!sources_spirv[stage].empty()) {
+                    source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
+                }
+                break;
+            }
+        }
+        if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
+            // Make sure programs have built if we are building shaders in parallel
+            for (OGLProgram& program : source_programs) {
+                if (program.handle != 0) {
+                    GLint status{};
+                    glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
+                }
+            }
+        }
+        if (shader_notify) {
+            shader_notify->MarkShaderComplete();
+        }
+        is_built = true;
+        built_condvar.notify_one();
+    }};
+    if (thread_worker) {
+        thread_worker->QueueWork(std::move(func));
+    } else {
+        func(nullptr);
+    }
+}
+
+template <typename Spec>
+void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
+    std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+    std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+    std::array<GLuint, MAX_TEXTURES> samplers;
+    size_t image_view_index{};
+    GLsizei sampler_binding{};
+
+    texture_cache.SynchronizeGraphicsDescriptors();
+
+    buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
+    buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
+    buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
+    buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
+
+    const auto& regs{maxwell3d.regs};
+    const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+    const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+        const Shader::Info& info{stage_infos[stage]};
+        buffer_cache.UnbindGraphicsStorageBuffers(stage);
+        if constexpr (Spec::has_storage_buffers) {
+            size_t ssbo_index{};
+            for (const auto& desc : info.storage_buffers_descriptors) {
+                ASSERT(desc.count == 1);
+                buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
+                                                       desc.cbuf_offset, desc.is_written);
+                ++ssbo_index;
+            }
+        }
+        const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+        const auto read_handle{[&](const auto& desc, u32 index) {
+            ASSERT(cbufs[desc.cbuf_index].enabled);
+            const u32 index_offset{index << desc.size_shift};
+            const u32 offset{desc.cbuf_offset + index_offset};
+            const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
+            if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> ||
+                          std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) {
+                if (desc.has_secondary) {
+                    ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
+                    const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
+                    const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
+                                                 second_offset};
+                    const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+                    const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+                    const u32 raw{lhs_raw | rhs_raw};
+                    return TexturePair(raw, via_header_index);
+                }
+            }
+            return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+        }};
+        const auto add_image{[&](const auto& desc) {
+            for (u32 index = 0; index < desc.count; ++index) {
+                const auto handle{read_handle(desc, index)};
+                image_view_indices[image_view_index++] = handle.first;
+            }
+        }};
+        if constexpr (Spec::has_texture_buffers) {
+            for (const auto& desc : info.texture_buffer_descriptors) {
+                for (u32 index = 0; index < desc.count; ++index) {
+                    const auto handle{read_handle(desc, index)};
+                    image_view_indices[image_view_index++] = handle.first;
+                    samplers[sampler_binding++] = 0;
+                }
+            }
+        }
+        if constexpr (Spec::has_image_buffers) {
+            for (const auto& desc : info.image_buffer_descriptors) {
+                add_image(desc);
+            }
+        }
+        for (const auto& desc : info.texture_descriptors) {
+            for (u32 index = 0; index < desc.count; ++index) {
+                const auto handle{read_handle(desc, index)};
+                image_view_indices[image_view_index++] = handle.first;
+
+                Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
+                samplers[sampler_binding++] = sampler->Handle();
+            }
+        }
+        if constexpr (Spec::has_images) {
+            for (const auto& desc : info.image_descriptors) {
+                add_image(desc);
+            }
+        }
+    }};
+    if constexpr (Spec::enabled_stages[0]) {
+        config_stage(0);
+    }
+    if constexpr (Spec::enabled_stages[1]) {
+        config_stage(1);
+    }
+    if constexpr (Spec::enabled_stages[2]) {
+        config_stage(2);
+    }
+    if constexpr (Spec::enabled_stages[3]) {
+        config_stage(3);
+    }
+    if constexpr (Spec::enabled_stages[4]) {
+        config_stage(4);
+    }
+    const std::span indices_span(image_view_indices.data(), image_view_index);
+    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+    texture_cache.UpdateRenderTargets(false);
+    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+
+    ImageId* texture_buffer_index{image_view_ids.data()};
+    const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
+        size_t index{};
+        const auto add_buffer{[&](const auto& desc) {
+            constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+            for (u32 i = 0; i < desc.count; ++i) {
+                bool is_written{false};
+                if constexpr (is_image) {
+                    is_written = desc.is_written;
+                }
+                ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+                buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
+                                                       image_view.BufferSize(), image_view.format,
+                                                       is_written, is_image);
+                ++index;
+                ++texture_buffer_index;
+            }
+        }};
+        const Shader::Info& info{stage_infos[stage]};
+        buffer_cache.UnbindGraphicsTextureBuffers(stage);
+
+        if constexpr (Spec::has_texture_buffers) {
+            for (const auto& desc : info.texture_buffer_descriptors) {
+                add_buffer(desc);
+            }
+        }
+        if constexpr (Spec::has_image_buffers) {
+            for (const auto& desc : info.image_buffer_descriptors) {
+                add_buffer(desc);
+            }
+        }
+        for (const auto& desc : info.texture_descriptors) {
+            texture_buffer_index += desc.count;
+        }
+        if constexpr (Spec::has_images) {
+            for (const auto& desc : info.image_descriptors) {
+                texture_buffer_index += desc.count;
+            }
+        }
+    }};
+    if constexpr (Spec::enabled_stages[0]) {
+        bind_stage_info(0);
+    }
+    if constexpr (Spec::enabled_stages[1]) {
+        bind_stage_info(1);
+    }
+    if constexpr (Spec::enabled_stages[2]) {
+        bind_stage_info(2);
+    }
+    if constexpr (Spec::enabled_stages[3]) {
+        bind_stage_info(3);
+    }
+    if constexpr (Spec::enabled_stages[4]) {
+        bind_stage_info(4);
+    }
+    buffer_cache.UpdateGraphicsBuffers(is_indexed);
+    buffer_cache.BindHostGeometryBuffers(is_indexed);
+
+    if (!is_built.load(std::memory_order::relaxed)) {
+        WaitForBuild();
+    }
+    if (assembly_programs[0].handle != 0) {
+        program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
+    } else {
+        program_manager.BindSourcePrograms(source_programs);
+    }
+    const ImageId* views_it{image_view_ids.data()};
+    GLsizei texture_binding = 0;
+    GLsizei image_binding = 0;
+    std::array<GLuint, MAX_TEXTURES> textures;
+    std::array<GLuint, MAX_IMAGES> images;
+    const auto prepare_stage{[&](size_t stage) {
+        buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
+        buffer_cache.BindHostStageBuffers(stage);
+
+        texture_binding += num_texture_buffers[stage];
+        image_binding += num_image_buffers[stage];
+
+        views_it += num_texture_buffers[stage];
+        views_it += num_image_buffers[stage];
+
+        const auto& info{stage_infos[stage]};
+        for (const auto& desc : info.texture_descriptors) {
+            for (u32 index = 0; index < desc.count; ++index) {
+                ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+                textures[texture_binding++] = image_view.Handle(desc.type);
+            }
+        }
+        for (const auto& desc : info.image_descriptors) {
+            for (u32 index = 0; index < desc.count; ++index) {
+                ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+                if (desc.is_written) {
+                    texture_cache.MarkModification(image_view.image_id);
+                }
+                images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+            }
+        }
+    }};
+    if constexpr (Spec::enabled_stages[0]) {
+        prepare_stage(0);
+    }
+    if constexpr (Spec::enabled_stages[1]) {
+        prepare_stage(1);
+    }
+    if constexpr (Spec::enabled_stages[2]) {
+        prepare_stage(2);
+    }
+    if constexpr (Spec::enabled_stages[3]) {
+        prepare_stage(3);
+    }
+    if constexpr (Spec::enabled_stages[4]) {
+        prepare_stage(4);
+    }
+    if (texture_binding != 0) {
+        ASSERT(texture_binding == sampler_binding);
+        glBindTextures(0, texture_binding, textures.data());
+        glBindSamplers(0, sampler_binding, samplers.data());
+    }
+    if (image_binding != 0) {
+        glBindImageTextures(0, image_binding, images.data());
+    }
+}
+
+void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
+    glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
+                                       xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
+}
+
+void GraphicsPipeline::GenerateTransformFeedbackState() {
+    // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
+    // when this is required.
+    GLint* cursor{xfb_attribs.data()};
+    GLint* current_stream{xfb_streams.data()};
+
+    for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
+        const auto& layout = key.xfb_state.layouts[feedback];
+        UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
+        if (layout.varying_count == 0) {
+            continue;
+        }
+        *current_stream = static_cast<GLint>(feedback);
+        if (current_stream != xfb_streams.data()) {
+            // When stepping one stream, push the expected token
+            cursor[0] = GL_NEXT_BUFFER_NV;
+            cursor[1] = 0;
+            cursor[2] = 0;
+            cursor += XFB_ENTRY_STRIDE;
+        }
+        ++current_stream;
+
+        const auto& locations = key.xfb_state.varyings[feedback];
+        std::optional<u8> current_index;
+        for (u32 offset = 0; offset < layout.varying_count; ++offset) {
+            const u8 location = locations[offset];
+            const u8 index = location / 4;
+
+            if (current_index == index) {
+                // Increase number of components of the previous attachment
+                ++cursor[-2];
+                continue;
+            }
+            current_index = index;
+
+            std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
+            cursor[1] = 1;
+            cursor += XFB_ENTRY_STRIDE;
+        }
+    }
+    num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
+    num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
+}
+
+void GraphicsPipeline::WaitForBuild() {
+    std::unique_lock lock{built_mutex};
+    built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
new file mode 100644
index 000000000..4e28d9a42
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <type_traits>
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+#include "video_core/transform_feedback.h"
+
+namespace OpenGL {
+
+namespace ShaderContext {
+struct Context;
+}
+
+class Device;
+class ProgramManager;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
+
+struct GraphicsPipelineKey {
+    std::array<u64, 6> unique_hashes;
+    union {
+        u32 raw;
+        BitField<0, 1, u32> xfb_enabled;
+        BitField<1, 1, u32> early_z;
+        BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
+        BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
+        BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
+        BitField<10, 1, u32> tessellation_clockwise;
+    };
+    std::array<u32, 3> padding;
+    VideoCommon::TransformFeedbackState xfb_state;
+
+    size_t Hash() const noexcept {
+        return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
+    }
+
+    bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
+        return std::memcmp(this, &rhs, Size()) == 0;
+    }
+
+    bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    [[nodiscard]] size_t Size() const noexcept {
+        if (xfb_enabled != 0) {
+            return sizeof(GraphicsPipelineKey);
+        } else {
+            return offsetof(GraphicsPipelineKey, padding);
+        }
+    }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
+
+class GraphicsPipeline {
+public:
+    explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+                              BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+                              Tegra::Engines::Maxwell3D& maxwell3d_,
+                              ProgramManager& program_manager_, StateTracker& state_tracker_,
+                              ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
+                              std::array<std::string, 5> sources,
+                              std::array<std::vector<u32>, 5> sources_spirv,
+                              const std::array<const Shader::Info*, 5>& infos,
+                              const GraphicsPipelineKey& key_);
+
+    void Configure(bool is_indexed) {
+        configure_func(this, is_indexed);
+    }
+
+    void ConfigureTransformFeedback() const {
+        if (num_xfb_attribs != 0) {
+            ConfigureTransformFeedbackImpl();
+        }
+    }
+
+    [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
+        return key;
+    }
+
+    [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+        return writes_global_memory;
+    }
+
+    [[nodiscard]] bool IsBuilt() const noexcept {
+        return is_built.load(std::memory_order::relaxed);
+    }
+
+    template <typename Spec>
+    static auto MakeConfigureSpecFunc() {
+        return [](GraphicsPipeline* pipeline, bool is_indexed) {
+            pipeline->ConfigureImpl<Spec>(is_indexed);
+        };
+    }
+
+private:
+    template <typename Spec>
+    void ConfigureImpl(bool is_indexed);
+
+    void ConfigureTransformFeedbackImpl() const;
+
+    void GenerateTransformFeedbackState();
+
+    void WaitForBuild();
+
+    TextureCache& texture_cache;
+    BufferCache& buffer_cache;
+    Tegra::MemoryManager& gpu_memory;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    ProgramManager& program_manager;
+    StateTracker& state_tracker;
+    const GraphicsPipelineKey key;
+
+    void (*configure_func)(GraphicsPipeline*, bool){};
+
+    std::array<OGLProgram, 5> source_programs;
+    std::array<OGLAssemblyProgram, 5> assembly_programs;
+    u32 enabled_stages_mask{};
+
+    std::array<Shader::Info, 5> stage_infos{};
+    std::array<u32, 5> enabled_uniform_buffer_masks{};
+    VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
+    std::array<u32, 5> base_uniform_bindings{};
+    std::array<u32, 5> base_storage_bindings{};
+    std::array<u32, 5> num_texture_buffers{};
+    std::array<u32, 5> num_image_buffers{};
+
+    bool use_storage_buffers{};
+    bool writes_global_memory{};
+
+    static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
+    GLsizei num_xfb_attribs{};
+    GLsizei num_xfb_strides{};
+    std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
+    std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
+
+    std::mutex built_mutex;
+    std::condition_variable built_condvar;
+    std::atomic_bool is_built{false};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::GraphicsPipelineKey> {
+    size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ceb3abcb2..41d2b73f4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -23,7 +23,6 @@
 #include "core/memory.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_query_cache.h"
@@ -40,7 +39,6 @@ namespace OpenGL {
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using GLvec4 = std::array<GLfloat, 4>;
 
-using Tegra::Engines::ShaderType;
 using VideoCore::Surface::PixelFormat;
 using VideoCore::Surface::SurfaceTarget;
 using VideoCore::Surface::SurfaceType;
@@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
 
 namespace {
-
 constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
 
-struct TextureHandle {
-    constexpr TextureHandle(u32 data, bool via_header_index) {
-        const Tegra::Texture::TextureHandle handle{data};
-        image = handle.tic_id;
-        sampler = via_header_index ? image : handle.tsc_id.Value();
-    }
-
-    u32 image;
-    u32 sampler;
-};
-
-template <typename Engine, typename Entry>
-TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
-                             ShaderType shader_type, size_t index = 0) {
-    if constexpr (std::is_same_v<Entry, SamplerEntry>) {
-        if (entry.is_separated) {
-            const u32 buffer_1 = entry.buffer;
-            const u32 buffer_2 = entry.secondary_buffer;
-            const u32 offset_1 = entry.offset;
-            const u32 offset_2 = entry.secondary_offset;
-            const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
-            const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
-            return TextureHandle(handle_1 | handle_2, via_header_index);
-        }
-    }
-    if (entry.is_bindless) {
-        const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
-        return TextureHandle(raw, via_header_index);
-    }
-    const u32 buffer = engine.GetBoundBuffer();
-    const u64 offset = (entry.offset + index) * sizeof(u32);
-    return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
-}
-
-/// Translates hardware transform feedback indices
-/// @param location Hardware location
-/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
-/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
-std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
-    const u8 index = location / 4;
-    if (index >= 8 && index <= 39) {
-        return {GL_GENERIC_ATTRIB_NV, index - 8};
-    }
-    if (index >= 48 && index <= 55) {
-        return {GL_TEXTURE_COORD_NV, index - 48};
-    }
-    switch (index) {
-    case 7:
-        return {GL_POSITION, 0};
-    case 40:
-        return {GL_PRIMARY_COLOR_NV, 0};
-    case 41:
-        return {GL_SECONDARY_COLOR_NV, 0};
-    case 42:
-        return {GL_BACK_PRIMARY_COLOR_NV, 0};
-    case 43:
-        return {GL_BACK_SECONDARY_COLOR_NV, 0};
-    }
-    UNIMPLEMENTED_MSG("index={}", index);
-    return {GL_POSITION, 0};
-}
-
 void oglEnable(GLenum cap, bool state) {
     (state ? glEnable : glDisable)(cap);
 }
-
-ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
-    if (entry.is_buffer) {
-        return ImageViewType::Buffer;
-    }
-    switch (entry.type) {
-    case Tegra::Shader::TextureType::Texture1D:
-        return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
-    case Tegra::Shader::TextureType::Texture2D:
-        return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
-    case Tegra::Shader::TextureType::Texture3D:
-        return ImageViewType::e3D;
-    case Tegra::Shader::TextureType::TextureCube:
-        return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
-    }
-    UNREACHABLE();
-    return ImageViewType::e2D;
-}
-
-ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
-    switch (entry.type) {
-    case Tegra::Shader::ImageType::Texture1D:
-        return ImageViewType::e1D;
-    case Tegra::Shader::ImageType::Texture1DArray:
-        return ImageViewType::e1DArray;
-    case Tegra::Shader::ImageType::Texture2D:
-        return ImageViewType::e2D;
-    case Tegra::Shader::ImageType::Texture2DArray:
-        return ImageViewType::e2DArray;
-    case Tegra::Shader::ImageType::Texture3D:
-        return ImageViewType::e3D;
-    case Tegra::Shader::ImageType::TextureBuffer:
-        return ImageViewType::Buffer;
-    }
-    UNREACHABLE();
-    return ImageViewType::e2D;
-}
-
 } // Anonymous namespace
 
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
       buffer_cache_runtime(device),
       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
-      shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
+      shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
+                   buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
       query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
-      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
-      async_shaders(emu_window_) {
-    if (device.UseAsynchronousShaders()) {
-        async_shaders.AllocateWorkers();
-    }
-}
+      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
 
 RasterizerOpenGL::~RasterizerOpenGL() = default;
 
@@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
         const auto gl_index = static_cast<GLuint>(index);
 
         // Disable constant attributes.
-        if (attrib.IsConstant()) {
+        if (attrib.constant) {
             glDisableVertexAttribArray(gl_index);
             continue;
         }
@@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() {
     }
 }
 
-void RasterizerOpenGL::SetupShaders(bool is_indexed) {
-    u32 clip_distances = 0;
-
-    std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
-    image_view_indices.clear();
-    sampler_handles.clear();
-
-    texture_cache.SynchronizeGraphicsDescriptors();
-
-    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
-        const auto& shader_config = maxwell3d.regs.shader_config[index];
-        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
-        // Skip stages that are not enabled
-        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
-            switch (program) {
-            case Maxwell::ShaderProgram::Geometry:
-                program_manager.UseGeometryShader(0);
-                break;
-            case Maxwell::ShaderProgram::Fragment:
-                program_manager.UseFragmentShader(0);
-                break;
-            default:
-                break;
-            }
-            continue;
-        }
-        // Currently this stages are not supported in the OpenGL backend.
-        // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
-        if (program == Maxwell::ShaderProgram::TesselationControl ||
-            program == Maxwell::ShaderProgram::TesselationEval) {
-            continue;
-        }
-
-        Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
-        const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
-        switch (program) {
-        case Maxwell::ShaderProgram::VertexA:
-        case Maxwell::ShaderProgram::VertexB:
-            program_manager.UseVertexShader(program_handle);
-            break;
-        case Maxwell::ShaderProgram::Geometry:
-            program_manager.UseGeometryShader(program_handle);
-            break;
-        case Maxwell::ShaderProgram::Fragment:
-            program_manager.UseFragmentShader(program_handle);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
-                              shader_config.enable.Value(), shader_config.offset);
-            break;
-        }
-
-        // Stage indices are 0 - 5
-        const size_t stage = index == 0 ? 0 : index - 1;
-        shaders[stage] = shader;
-
-        SetupDrawTextures(shader, stage);
-        SetupDrawImages(shader, stage);
-
-        buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
-
-        buffer_cache.UnbindGraphicsStorageBuffers(stage);
-        u32 ssbo_index = 0;
-        for (const auto& buffer : shader->GetEntries().global_memory_entries) {
-            buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
-                                                   buffer.cbuf_offset, buffer.is_written);
-            ++ssbo_index;
-        }
-
-        // Workaround for Intel drivers.
-        // When a clip distance is enabled but not set in the shader it crops parts of the screen
-        // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
-        // clip distances only when it's written by a shader stage.
-        clip_distances |= shader->GetEntries().clip_distances;
-
-        // When VertexA is enabled, we have dual vertex shaders
-        if (program == Maxwell::ShaderProgram::VertexA) {
-            // VertexB was combined with VertexA, so we skip the VertexB iteration
-            ++index;
-        }
-    }
-    SyncClipEnabled(clip_distances);
-    maxwell3d.dirty.flags[Dirty::Shaders] = false;
-
-    buffer_cache.UpdateGraphicsBuffers(is_indexed);
-
-    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
-    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
-
-    buffer_cache.BindHostGeometryBuffers(is_indexed);
-
-    size_t image_view_index = 0;
-    size_t texture_index = 0;
-    size_t image_index = 0;
-    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
-        const Shader* const shader = shaders[stage];
-        if (!shader) {
-            continue;
-        }
-        buffer_cache.BindHostStageBuffers(stage);
-        const auto& base = device.GetBaseBindings(stage);
-        BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
-                     texture_index, image_index);
-    }
-}
-
 void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
                                          const VideoCore::DiskResourceLoadCallback& callback) {
-    shader_cache.LoadDiskCache(title_id, stop_loading, callback);
+    shader_cache.LoadDiskResources(title_id, stop_loading, callback);
 }
 
 void RasterizerOpenGL::Clear() {
@@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 
     SyncState();
 
-    // Setup shaders and their used resources.
+    GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
+    if (!pipeline) {
+        return;
+    }
     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
-    SetupShaders(is_indexed);
-
-    texture_cache.UpdateRenderTargets(false);
-    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
-    program_manager.BindGraphicsPipeline();
+    pipeline->Configure(is_indexed);
 
     const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
-    BeginTransformFeedback(primitive_mode);
+    BeginTransformFeedback(pipeline, primitive_mode);
 
     const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
     const GLsizei num_instances =
@@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
                                               num_instances, base_instance);
         }
     }
-
     EndTransformFeedback();
 
     ++num_queued_commands;
+    has_written_global_memory |= pipeline->WritesGlobalMemory();
 
     gpu.TickWork();
 }
 
-void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
-    Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
-
-    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
-    BindComputeTextures(kernel);
-
-    const auto& entries = kernel->GetEntries();
-    buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
-    buffer_cache.UnbindComputeStorageBuffers();
-    u32 ssbo_index = 0;
-    for (const auto& buffer : entries.global_memory_entries) {
-        buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
-                                              buffer.is_written);
-        ++ssbo_index;
-    }
-    buffer_cache.UpdateComputeBuffers();
-    buffer_cache.BindHostComputeBuffers();
-
-    const auto& launch_desc = kepler_compute.launch_description;
-    glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
+void RasterizerOpenGL::DispatchCompute() {
+    ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
+    if (!pipeline) {
+        return;
+    }
+    pipeline->Configure();
+    const auto& qmd{kepler_compute.launch_description};
+    glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
     ++num_queued_commands;
+    has_written_global_memory |= pipeline->WritesGlobalMemory();
 }
 
 void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -661,7 +435,7 @@ void RasterizerOpenGL::WaitForIdle() {
 }
 
 void RasterizerOpenGL::FragmentBarrier() {
-    glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
+    glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
 }
 
 void RasterizerOpenGL::TiledCacheBarrier() {
@@ -674,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() {
         return;
     }
     num_queued_commands = 0;
+
+    // Make sure memory stored from the previous GL command stream is visible
+    // This is only needed on assembly shaders where we write to GPU memory with raw pointers
+    if (has_written_global_memory) {
+        has_written_global_memory = false;
+        glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+    }
     glFlush();
 }
 
@@ -721,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
     // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
 
-    screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
+    screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
     screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
     return true;
 }
 
-void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
-    image_view_indices.clear();
-    sampler_handles.clear();
-
-    texture_cache.SynchronizeComputeDescriptors();
-
-    SetupComputeTextures(kernel);
-    SetupComputeImages(kernel);
-
-    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
-    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
-
-    program_manager.BindCompute(kernel->GetHandle());
-    size_t image_view_index = 0;
-    size_t texture_index = 0;
-    size_t image_index = 0;
-    BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
-}
-
-void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
-                                    GLuint base_image, size_t& image_view_index,
-                                    size_t& texture_index, size_t& image_index) {
-    const GLuint* const samplers = sampler_handles.data() + texture_index;
-    const GLuint* const textures = texture_handles.data() + texture_index;
-    const GLuint* const images = image_handles.data() + image_index;
-
-    const size_t num_samplers = entries.samplers.size();
-    for (const auto& sampler : entries.samplers) {
-        for (size_t i = 0; i < sampler.size; ++i) {
-            const ImageViewId image_view_id = image_view_ids[image_view_index++];
-            const ImageView& image_view = texture_cache.GetImageView(image_view_id);
-            const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
-            texture_handles[texture_index++] = handle;
-        }
-    }
-    const size_t num_images = entries.images.size();
-    for (size_t unit = 0; unit < num_images; ++unit) {
-        // TODO: Mark as modified
-        const ImageViewId image_view_id = image_view_ids[image_view_index++];
-        const ImageView& image_view = texture_cache.GetImageView(image_view_id);
-        const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
-        image_handles[image_index] = handle;
-        ++image_index;
-    }
-    if (num_samplers > 0) {
-        glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
-        glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
-    }
-    if (num_images > 0) {
-        glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
-    }
-}
-
-void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
-    const bool via_header_index =
-        maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
-    for (const auto& entry : shader->GetEntries().samplers) {
-        const auto shader_type = static_cast<ShaderType>(stage_index);
-        for (size_t index = 0; index < entry.size; ++index) {
-            const auto handle =
-                GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
-            const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
-            sampler_handles.push_back(sampler->Handle());
-            image_view_indices.push_back(handle.image);
-        }
-    }
-}
-
-void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
-    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
-    for (const auto& entry : kernel->GetEntries().samplers) {
-        for (size_t i = 0; i < entry.size; ++i) {
-            const auto handle =
-                GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
-            const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
-            sampler_handles.push_back(sampler->Handle());
-            image_view_indices.push_back(handle.image);
-        }
-    }
-}
-
-void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
-    const bool via_header_index =
-        maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
-    for (const auto& entry : shader->GetEntries().images) {
-        const auto shader_type = static_cast<ShaderType>(stage_index);
-        const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
-        image_view_indices.push_back(handle.image);
-    }
-}
-
-void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
-    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
-    for (const auto& entry : shader->GetEntries().images) {
-        const auto handle =
-            GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
-        image_view_indices.push_back(handle.image);
-    }
-}
-
 void RasterizerOpenGL::SyncState() {
     SyncViewport();
     SyncRasterizeEnable();
@@ -941,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
 
 void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
     auto& flags = maxwell3d.dirty.flags;
-    if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
+    if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
         return;
     }
     flags[Dirty::ClipDistances] = false;
@@ -1318,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
     oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
 }
 
-void RasterizerOpenGL::SyncTransformFeedback() {
-    // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
-    // when this is required.
-    const auto& regs = maxwell3d.regs;
-
-    static constexpr std::size_t STRIDE = 3;
-    std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
-    std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
-
-    GLint* cursor = attribs.data();
-    GLint* current_stream = streams.data();
-
-    for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
-        const auto& layout = regs.tfb_layouts[feedback];
-        UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
-        if (layout.varying_count == 0) {
-            continue;
-        }
-
-        *current_stream = static_cast<GLint>(feedback);
-        if (current_stream != streams.data()) {
-            // When stepping one stream, push the expected token
-            cursor[0] = GL_NEXT_BUFFER_NV;
-            cursor[1] = 0;
-            cursor[2] = 0;
-            cursor += STRIDE;
-        }
-        ++current_stream;
-
-        const auto& locations = regs.tfb_varying_locs[feedback];
-        std::optional<u8> current_index;
-        for (u32 offset = 0; offset < layout.varying_count; ++offset) {
-            const u8 location = locations[offset];
-            const u8 index = location / 4;
-
-            if (current_index == index) {
-                // Increase number of components of the previous attachment
-                ++cursor[-2];
-                continue;
-            }
-            current_index = index;
-
-            std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
-            cursor[1] = 1;
-            cursor += STRIDE;
-        }
-    }
-
-    const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
-    const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
-    glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
-                                       GL_INTERLEAVED_ATTRIBS);
-}
-
-void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
+void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
     const auto& regs = maxwell3d.regs;
     if (regs.tfb_enabled == 0) {
         return;
     }
-    if (device.UseAssemblyShaders()) {
-        SyncTransformFeedback();
-    }
+    program->ConfigureTransformFeedback();
+
     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1393,11 +1019,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
 }
 
 void RasterizerOpenGL::EndTransformFeedback() {
-    const auto& regs = maxwell3d.regs;
-    if (regs.tfb_enabled == 0) {
-        return;
+    if (maxwell3d.regs.tfb_enabled != 0) {
+        glEndTransformFeedback();
     }
-    glEndTransformFeedback();
 }
 
 AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d30ad698f..d0397b745 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,11 +28,9 @@
 #include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/shader/async_shaders.h"
 #include "video_core/textures/texture.h"
 
 namespace Core::Memory {
@@ -81,7 +79,7 @@ public:
 
     void Draw(bool is_indexed, bool is_instanced) override;
     void Clear() override;
-    void DispatchCompute(GPUVAddr code_addr) override;
+    void DispatchCompute() override;
     void ResetCounter(VideoCore::QueryType type) override;
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -118,36 +116,11 @@ public:
         return num_queued_commands > 0;
     }
 
-    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
-        return async_shaders;
-    }
-
-    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
-        return async_shaders;
-    }
-
 private:
     static constexpr size_t MAX_TEXTURES = 192;
     static constexpr size_t MAX_IMAGES = 48;
     static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
 
-    void BindComputeTextures(Shader* kernel);
-
-    void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
-                      size_t& image_view_index, size_t& texture_index, size_t& image_index);
-
-    /// Configures the current textures to use for the draw command.
-    void SetupDrawTextures(const Shader* shader, size_t stage_index);
-
-    /// Configures the textures used in a compute shader.
-    void SetupComputeTextures(const Shader* kernel);
-
-    /// Configures images in a graphics shader.
-    void SetupDrawImages(const Shader* shader, size_t stage_index);
-
-    /// Configures images in a compute shader.
-    void SetupComputeImages(const Shader* shader);
-
     /// Syncs state to match guest's
     void SyncState();
 
@@ -220,18 +193,12 @@ private:
     /// Syncs vertex instances to match the guest state
     void SyncVertexInstances();
 
-    /// Syncs transform feedback state to match guest state
-    /// @note Only valid on assembly shaders
-    void SyncTransformFeedback();
-
     /// Begin a transform feedback
-    void BeginTransformFeedback(GLenum primitive_mode);
+    void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
 
     /// End a transform feedback
     void EndTransformFeedback();
 
-    void SetupShaders(bool is_indexed);
-
     Tegra::GPU& gpu;
     Tegra::Engines::Maxwell3D& maxwell3d;
     Tegra::Engines::KeplerCompute& kepler_compute;
@@ -246,13 +213,11 @@ private:
     TextureCache texture_cache;
     BufferCacheRuntime buffer_cache_runtime;
     BufferCache buffer_cache;
-    ShaderCacheOpenGL shader_cache;
+    ShaderCache shader_cache;
     QueryCache query_cache;
     AccelerateDMA accelerate_dma;
     FenceManagerOpenGL fence_manager;
 
-    VideoCommon::Shader::AsyncShaders async_shaders;
-
     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
     std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
     boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
@@ -260,7 +225,8 @@ private:
     std::array<GLuint, MAX_IMAGES> image_handles{};
 
     /// Number of commands queued to the OpenGL driver. Resetted on flush.
-    std::size_t num_queued_commands = 0;
+    size_t num_queued_commands = 0;
+    bool has_written_global_memory = false;
 
     u32 last_clip_distance_mask = 0;
 };
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 3428e5e21..8695c29e3 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -83,18 +83,6 @@ void OGLSampler::Release() {
     handle = 0;
 }
 
-void OGLShader::Create(std::string_view source, GLenum type) {
-    if (handle != 0) {
-        return;
-    }
-    if (source.empty()) {
-        return;
-    }
-
-    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    handle = GLShader::LoadShader(source, type);
-}
-
 void OGLShader::Release() {
     if (handle == 0)
         return;
@@ -104,21 +92,6 @@ void OGLShader::Release() {
     handle = 0;
 }
 
-void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
-                                  const char* frag_shader, bool separable_program,
-                                  bool hint_retrievable) {
-    OGLShader vert, geo, frag;
-    if (vert_shader)
-        vert.Create(vert_shader, GL_VERTEX_SHADER);
-    if (geo_shader)
-        geo.Create(geo_shader, GL_GEOMETRY_SHADER);
-    if (frag_shader)
-        frag.Create(frag_shader, GL_FRAGMENT_SHADER);
-
-    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
-}
-
 void OGLProgram::Release() {
     if (handle == 0)
         return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 552d79db4..b2d5bfd3b 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -8,7 +8,6 @@
 #include <utility>
 #include <glad/glad.h>
 #include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_shader_util.h"
 
 namespace OpenGL {
 
@@ -128,8 +127,6 @@ public:
         return *this;
     }
 
-    void Create(std::string_view source, GLenum type);
-
     void Release();
 
     GLuint handle = 0;
@@ -151,17 +148,6 @@ public:
         return *this;
     }
 
-    template <typename... T>
-    void Create(bool separable_program, bool hint_retrievable, T... shaders) {
-        if (handle != 0)
-            return;
-        handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
-    }
-
-    /// Creates a new internal OpenGL resource and stores the handle
-    void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
-                          bool separable_program = false, bool hint_retrievable = false);
-
     /// Deletes the internal OpenGL resource
     void Release();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5a01c59ec..8d6cc074c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,606 +3,544 @@
 // Refer to the license.txt file included.
 
 #include <atomic>
+#include <fstream>
 #include <functional>
 #include <mutex>
-#include <optional>
 #include <string>
 #include <thread>
-#include <unordered_set>
 
 #include "common/alignment.h"
 #include "common/assert.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
 #include "common/logging/log.h"
 #include "common/scope_exit.h"
+#include "common/settings.h"
+#include "common/thread_worker.h"
 #include "core/core.h"
-#include "core/frontend/emu_window.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
+#include "shader_recompiler/backend/glsl/emit_glsl.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/profile.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
 #include "video_core/memory_manager.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
 #include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
 #include "video_core/shader_notify.h"
 
 namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::GetUniqueIdentifier;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::Registry;
-using VideoCommon::Shader::ShaderIR;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
 namespace {
-
-constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
-
-/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetGLShaderType(ShaderType shader_type) {
-    switch (shader_type) {
-    case ShaderType::Vertex:
-        return GL_VERTEX_SHADER;
-    case ShaderType::Geometry:
-        return GL_GEOMETRY_SHADER;
-    case ShaderType::Fragment:
-        return GL_FRAGMENT_SHADER;
-    case ShaderType::Compute:
-        return GL_COMPUTE_SHADER;
-    default:
-        return GL_NONE;
-    }
+using Shader::Backend::GLASM::EmitGLASM;
+using Shader::Backend::GLSL::EmitGLSL;
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
+using VideoCommon::LoadPipelines;
+using VideoCommon::SerializePipeline;
+using Context = ShaderContext::Context;
+
+constexpr u32 CACHE_VERSION = 5;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+    return std::span(container.data(), container.size());
 }
 
-constexpr const char* GetShaderTypeName(ShaderType shader_type) {
-    switch (shader_type) {
-    case ShaderType::Vertex:
-        return "VS";
-    case ShaderType::TesselationControl:
-        return "HS";
-    case ShaderType::TesselationEval:
-        return "DS";
-    case ShaderType::Geometry:
-        return "GS";
-    case ShaderType::Fragment:
-        return "FS";
-    case ShaderType::Compute:
-        return "CS";
-    }
-    return "UNK";
+Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
+                                    const Shader::IR::Program& program,
+                                    const Shader::IR::Program* previous_program,
+                                    bool glasm_use_storage_buffers, bool use_assembly_shaders) {
+    Shader::RuntimeInfo info;
+    if (previous_program) {
+        info.previous_stage_stores = previous_program->info.stores;
+    } else {
+        // Mark all stores as available for vertex shaders
+        info.previous_stage_stores.mask.set();
+    }
+    switch (program.stage) {
+    case Shader::Stage::VertexB:
+    case Shader::Stage::Geometry:
+        if (!use_assembly_shaders && key.xfb_enabled != 0) {
+            info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+        }
+        break;
+    case Shader::Stage::TessellationEval:
+        info.tess_clockwise = key.tessellation_clockwise != 0;
+        info.tess_primitive = [&key] {
+            switch (key.tessellation_primitive) {
+            case Maxwell::TessellationPrimitive::Isolines:
+                return Shader::TessPrimitive::Isolines;
+            case Maxwell::TessellationPrimitive::Triangles:
+                return Shader::TessPrimitive::Triangles;
+            case Maxwell::TessellationPrimitive::Quads:
+                return Shader::TessPrimitive::Quads;
+            }
+            UNREACHABLE();
+            return Shader::TessPrimitive::Triangles;
+        }();
+        info.tess_spacing = [&] {
+            switch (key.tessellation_spacing) {
+            case Maxwell::TessellationSpacing::Equal:
+                return Shader::TessSpacing::Equal;
+            case Maxwell::TessellationSpacing::FractionalOdd:
+                return Shader::TessSpacing::FractionalOdd;
+            case Maxwell::TessellationSpacing::FractionalEven:
+                return Shader::TessSpacing::FractionalEven;
+            }
+            UNREACHABLE();
+            return Shader::TessSpacing::Equal;
+        }();
+        break;
+    case Shader::Stage::Fragment:
+        info.force_early_z = key.early_z != 0;
+        break;
+    default:
+        break;
+    }
+    switch (key.gs_input_topology) {
+    case Maxwell::PrimitiveTopology::Points:
+        info.input_topology = Shader::InputTopology::Points;
+        break;
+    case Maxwell::PrimitiveTopology::Lines:
+    case Maxwell::PrimitiveTopology::LineLoop:
+    case Maxwell::PrimitiveTopology::LineStrip:
+        info.input_topology = Shader::InputTopology::Lines;
+        break;
+    case Maxwell::PrimitiveTopology::Triangles:
+    case Maxwell::PrimitiveTopology::TriangleStrip:
+    case Maxwell::PrimitiveTopology::TriangleFan:
+    case Maxwell::PrimitiveTopology::Quads:
+    case Maxwell::PrimitiveTopology::QuadStrip:
+    case Maxwell::PrimitiveTopology::Polygon:
+    case Maxwell::PrimitiveTopology::Patches:
+        info.input_topology = Shader::InputTopology::Triangles;
+        break;
+    case Maxwell::PrimitiveTopology::LinesAdjacency:
+    case Maxwell::PrimitiveTopology::LineStripAdjacency:
+        info.input_topology = Shader::InputTopology::LinesAdjacency;
+        break;
+    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+        info.input_topology = Shader::InputTopology::TrianglesAdjacency;
+        break;
+    }
+    info.glasm_use_storage_buffers = glasm_use_storage_buffers;
+    return info;
 }
 
-constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
-    switch (program_type) {
-    case Maxwell::ShaderProgram::VertexA:
-    case Maxwell::ShaderProgram::VertexB:
-        return ShaderType::Vertex;
-    case Maxwell::ShaderProgram::TesselationControl:
-        return ShaderType::TesselationControl;
-    case Maxwell::ShaderProgram::TesselationEval:
-        return ShaderType::TesselationEval;
-    case Maxwell::ShaderProgram::Geometry:
-        return ShaderType::Geometry;
-    case Maxwell::ShaderProgram::Fragment:
-        return ShaderType::Fragment;
-    }
-    return {};
+void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
+    std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
+        return VideoCommon::TransformFeedbackState::Layout{
+            .stream = layout.stream,
+            .varying_count = layout.varying_count,
+            .stride = layout.stride,
+        };
+    });
+    state.varyings = regs.tfb_varying_locs;
 }
+} // Anonymous namespace
 
-constexpr GLenum AssemblyEnum(ShaderType shader_type) {
-    switch (shader_type) {
-    case ShaderType::Vertex:
-        return GL_VERTEX_PROGRAM_NV;
-    case ShaderType::TesselationControl:
-        return GL_TESS_CONTROL_PROGRAM_NV;
-    case ShaderType::TesselationEval:
-        return GL_TESS_EVALUATION_PROGRAM_NV;
-    case ShaderType::Geometry:
-        return GL_GEOMETRY_PROGRAM_NV;
-    case ShaderType::Fragment:
-        return GL_FRAGMENT_PROGRAM_NV;
-    case ShaderType::Compute:
-        return GL_COMPUTE_PROGRAM_NV;
+ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
+                         Tegra::Engines::Maxwell3D& maxwell3d_,
+                         Tegra::Engines::KeplerCompute& kepler_compute_,
+                         Tegra::MemoryManager& gpu_memory_, const Device& device_,
+                         TextureCache& texture_cache_, BufferCache& buffer_cache_,
+                         ProgramManager& program_manager_, StateTracker& state_tracker_,
+                         VideoCore::ShaderNotify& shader_notify_)
+    : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+      emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
+      buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
+      shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
+      profile{
+          .supported_spirv = 0x00010000,
+
+          .unified_descriptor_binding = false,
+          .support_descriptor_aliasing = false,
+          .support_int8 = false,
+          .support_int16 = false,
+          .support_int64 = device.HasShaderInt64(),
+          .support_vertex_instance_id = true,
+          .support_float_controls = false,
+          .support_separate_denorm_behavior = false,
+          .support_separate_rounding_mode = false,
+          .support_fp16_denorm_preserve = false,
+          .support_fp32_denorm_preserve = false,
+          .support_fp16_denorm_flush = false,
+          .support_fp32_denorm_flush = false,
+          .support_fp16_signed_zero_nan_preserve = false,
+          .support_fp32_signed_zero_nan_preserve = false,
+          .support_fp64_signed_zero_nan_preserve = false,
+          .support_explicit_workgroup_layout = false,
+          .support_vote = true,
+          .support_viewport_index_layer_non_geometry =
+              device.HasNvViewportArray2() || device.HasVertexViewportLayer(),
+          .support_viewport_mask = device.HasNvViewportArray2(),
+          .support_typeless_image_loads = device.HasImageLoadFormatted(),
+          .support_demote_to_helper_invocation = false,
+          .support_int64_atomics = false,
+          .support_derivative_control = device.HasDerivativeControl(),
+          .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
+          .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
+          .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
+          .support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
+          .support_gl_warp_intrinsics = false,
+          .support_gl_variable_aoffi = device.HasVariableAoffi(),
+          .support_gl_sparse_textures = device.HasSparseTexture2(),
+          .support_gl_derivative_control = device.HasDerivativeControl(),
+
+          .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
+
+          .lower_left_origin_mode = true,
+          .need_declared_frag_colors = true,
+          .need_fastmath_off = device.NeedsFastmathOff(),
+
+          .has_broken_spirv_clamp = true,
+          .has_broken_unsigned_image_offsets = true,
+          .has_broken_signed_operations = true,
+          .has_broken_fp16_float_controls = false,
+          .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
+          .has_gl_precise_bug = device.HasPreciseBug(),
+          .ignore_nan_fp_comparisons = true,
+          .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
+      },
+      host_info{
+          .support_float16 = false,
+          .support_int64 = device.HasShaderInt64(),
+      } {
+    if (use_asynchronous_shaders) {
+        workers = CreateWorkers();
     }
-    return {};
 }
 
-std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
-    return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
-}
+ShaderCache::~ShaderCache() = default;
 
-std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
-    const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
-    const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
-                                                           entry.graphics_info, entry.compute_info};
-    auto registry = std::make_shared<Registry>(entry.type, info);
-    for (const auto& [address, value] : entry.keys) {
-        const auto [buffer, offset] = address;
-        registry->InsertKey(buffer, offset, value);
-    }
-    for (const auto& [offset, sampler] : entry.bound_samplers) {
-        registry->InsertBoundSampler(offset, sampler);
+void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+                                    const VideoCore::DiskResourceLoadCallback& callback) {
+    if (title_id == 0) {
+        return;
     }
-    for (const auto& [key, sampler] : entry.bindless_samplers) {
-        const auto [buffer, offset] = key;
-        registry->InsertBindlessSampler(buffer, offset, sampler);
+    const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+    const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
+    if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+        LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories");
+        return;
     }
-    return registry;
-}
-
-std::unordered_set<GLenum> GetSupportedFormats() {
-    GLint num_formats;
-    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+    shader_cache_filename = base_dir / "opengl.bin";
+
+    if (!workers) {
+        workers = CreateWorkers();
+    }
+    struct {
+        std::mutex mutex;
+        size_t total{};
+        size_t built{};
+        bool has_loaded{};
+    } state;
+
+    const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+        ComputePipelineKey key;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key));
+        workers->QueueWork(
+            [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
+                ctx->pools.ReleaseContents();
+                auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
+                std::lock_guard lock{state.mutex};
+                if (pipeline) {
+                    compute_cache.emplace(key, std::move(pipeline));
+                }
+                ++state.built;
+                if (state.has_loaded) {
+                    callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+                }
+            });
+        ++state.total;
+    }};
+    const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+        GraphicsPipelineKey key;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key));
+        workers->QueueWork(
+            [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable {
+                boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+                for (auto& env : envs) {
+                    env_ptrs.push_back(&env);
+                }
+                ctx->pools.ReleaseContents();
+                auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
+                std::lock_guard lock{state.mutex};
+                if (pipeline) {
+                    graphics_cache.emplace(key, std::move(pipeline));
+                }
+                ++state.built;
+                if (state.has_loaded) {
+                    callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+                }
+            });
+        ++state.total;
+    }};
+    LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics);
 
-    std::vector<GLint> formats(num_formats);
-    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+    std::unique_lock lock{state.mutex};
+    callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+    state.has_loaded = true;
+    lock.unlock();
 
-    std::unordered_set<GLenum> supported_formats;
-    for (const GLint format : formats) {
-        supported_formats.insert(static_cast<GLenum>(format));
+    workers->WaitForRequests();
+    if (!use_asynchronous_shaders) {
+        workers.reset();
     }
-    return supported_formats;
 }
 
-} // Anonymous namespace
-
-ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
-                             const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
-    if (device.UseDriverCache()) {
-        // Ignore hint retrievable if we are using the driver cache
-        hint_retrievable = false;
-    }
-    const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
-    LOG_INFO(Render_OpenGL, "{}", shader_id);
-
-    auto program = std::make_shared<ProgramHandle>();
-
-    if (device.UseAssemblyShaders()) {
-        const std::string arb =
-            DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
-
-        GLuint& arb_prog = program->assembly_program.handle;
-
-// Commented out functions signal OpenGL errors but are compatible with apitrace.
-// Use them only to capture and replay on apitrace.
-#if 0
-        glGenProgramsNV(1, &arb_prog);
-        glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
-                        reinterpret_cast<const GLubyte*>(arb.data()));
-#else
-        glGenProgramsARB(1, &arb_prog);
-        glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
-                                static_cast<GLsizei>(arb.size()), arb.data());
-#endif
-        const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
-        if (err && *err) {
-            LOG_CRITICAL(Render_OpenGL, "{}", err);
-            LOG_INFO(Render_OpenGL, "\n{}", arb);
-        }
-    } else {
-        const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
-        OGLShader shader;
-        shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
-
-        program->source_program.Create(true, hint_retrievable, shader.handle);
-    }
-
-    return program;
+GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
+    if (!RefreshStages(graphics_key.unique_hashes)) {
+        current_pipeline = nullptr;
+        return nullptr;
+    }
+    const auto& regs{maxwell3d.regs};
+    graphics_key.raw = 0;
+    graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
+    graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
+                                              ? regs.draw.topology.Value()
+                                              : Maxwell::PrimitiveTopology{});
+    graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
+    graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
+    graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
+    graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0);
+    if (graphics_key.xfb_enabled) {
+        SetXfbState(graphics_key.xfb_state, regs);
+    }
+    if (current_pipeline && graphics_key == current_pipeline->Key()) {
+        return BuiltPipeline(current_pipeline);
+    }
+    return CurrentGraphicsPipelineSlowPath();
 }
 
-Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
-               ProgramSharedPtr program_, bool is_built_)
-    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
-      is_built{is_built_} {
-    handle = program->assembly_program.handle;
-    if (handle == 0) {
-        handle = program->source_program.handle;
+GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() {
+    const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+    auto& pipeline{pair->second};
+    if (is_new) {
+        pipeline = CreateGraphicsPipeline();
     }
-    if (is_built) {
-        ASSERT(handle != 0);
+    if (!pipeline) {
+        return nullptr;
     }
+    current_pipeline = pipeline.get();
+    return BuiltPipeline(current_pipeline);
 }
 
-Shader::~Shader() = default;
-
-GLuint Shader::GetHandle() const {
-    DEBUG_ASSERT(registry->IsConsistent());
-    return handle;
-}
-
-bool Shader::IsBuilt() const {
-    return is_built;
-}
-
-void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
-    program->source_program = std::move(new_program);
-    handle = program->source_program.handle;
-    is_built = true;
-}
-
-void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
-    program->assembly_program = std::move(new_program);
-    handle = program->assembly_program.handle;
-    is_built = true;
-}
-
-std::unique_ptr<Shader> Shader::CreateStageFromMemory(
-    const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
-    ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
-    const auto shader_type = GetShaderType(program_type);
-
-    auto& gpu = params.gpu;
-    gpu.ShaderNotify().MarkSharderBuilding();
-
-    auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
-    if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
-        const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
-        // TODO(Rodrigo): Handle VertexA shaders
-        // std::optional<ShaderIR> ir_b;
-        // if (!code_b.empty()) {
-        //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
-        // }
-        auto program =
-            BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
-        ShaderDiskCacheEntry entry;
-        entry.type = shader_type;
-        entry.code = std::move(code);
-        entry.code_b = std::move(code_b);
-        entry.unique_identifier = params.unique_identifier;
-        entry.bound_buffer = registry->GetBoundBuffer();
-        entry.graphics_info = registry->GetGraphicsInfo();
-        entry.keys = registry->GetKeys();
-        entry.bound_samplers = registry->GetBoundSamplers();
-        entry.bindless_samplers = registry->GetBindlessSamplers();
-        params.disk_cache.SaveEntry(std::move(entry));
-
-        gpu.ShaderNotify().MarkShaderComplete();
-
-        return std::unique_ptr<Shader>(new Shader(std::move(registry),
-                                                  MakeEntries(params.device, ir, shader_type),
-                                                  std::move(program), true));
-    } else {
-        // Required for entries
-        const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
-        auto entries = MakeEntries(params.device, ir, shader_type);
-
-        async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
-                                        std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
-                                        COMPILER_SETTINGS, *registry, cpu_addr);
-
-        auto program = std::make_shared<ProgramHandle>();
-        return std::unique_ptr<Shader>(
-            new Shader(std::move(registry), std::move(entries), std::move(program), false));
+GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
+    if (pipeline->IsBuilt()) {
+        return pipeline;
     }
-}
-
-std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
-                                                       ProgramCode code) {
-    auto& gpu = params.gpu;
-    gpu.ShaderNotify().MarkSharderBuilding();
-
-    auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
-    const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
-    const u64 uid = params.unique_identifier;
-    auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
-
-    ShaderDiskCacheEntry entry;
-    entry.type = ShaderType::Compute;
-    entry.code = std::move(code);
-    entry.unique_identifier = uid;
-    entry.bound_buffer = registry->GetBoundBuffer();
-    entry.compute_info = registry->GetComputeInfo();
-    entry.keys = registry->GetKeys();
-    entry.bound_samplers = registry->GetBoundSamplers();
-    entry.bindless_samplers = registry->GetBindlessSamplers();
-    params.disk_cache.SaveEntry(std::move(entry));
-
-    gpu.ShaderNotify().MarkShaderComplete();
-
-    return std::unique_ptr<Shader>(new Shader(std::move(registry),
-                                              MakeEntries(params.device, ir, ShaderType::Compute),
-                                              std::move(program)));
-}
-
-std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
-                                                const PrecompiledShader& precompiled_shader) {
-    return std::unique_ptr<Shader>(new Shader(
-        precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
-}
-
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
-                                     Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
-                                     Tegra::Engines::Maxwell3D& maxwell3d_,
-                                     Tegra::Engines::KeplerCompute& kepler_compute_,
-                                     Tegra::MemoryManager& gpu_memory_, const Device& device_)
-    : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
-      maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
-
-ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-
-void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
-                                      const VideoCore::DiskResourceLoadCallback& callback) {
-    disk_cache.BindTitleID(title_id);
-    const std::optional transferable = disk_cache.LoadTransferable();
-
-    LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
-             transferable.has_value() ? transferable->size() : 0);
-
-    if (!transferable) {
-        return;
+    if (!use_asynchronous_shaders) {
+        return pipeline;
     }
-
-    std::vector<ShaderDiskCachePrecompiled> gl_cache;
-    if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
-        // Only load precompiled cache when we are not using assembly shaders
-        gl_cache = disk_cache.LoadPrecompiled();
+    // If something is using depth, we can assume that games are not rendering anything which
+    // will be used one time.
+    if (maxwell3d.regs.zeta_enable) {
+        return nullptr;
     }
-    const auto supported_formats = GetSupportedFormats();
-
-    // Track if precompiled cache was altered during loading to know if we have to
-    // serialize the virtual precompiled cache file back to the hard drive
-    bool precompiled_cache_altered = false;
-
-    // Inform the frontend about shader build initialization
-    if (callback) {
-        callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
+    // If games are using a small index count, we can assume these are full screen quads.
+    // Usually these shaders are only used once for building textures so we can assume they
+    // can't be built async
+    if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+        return pipeline;
     }
+    return nullptr;
+}
 
-    std::mutex mutex;
-    std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
-    std::atomic_bool gl_cache_failed = false;
-
-    const auto find_precompiled = [&gl_cache](u64 id) {
-        return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
-    };
-
-    const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
-                            std::size_t end) {
-        const auto scope = context->Acquire();
-
-        for (std::size_t i = begin; i < end; ++i) {
-            if (stop_loading.stop_requested()) {
-                return;
-            }
-            const auto& entry = (*transferable)[i];
-            const u64 uid = entry.unique_identifier;
-            const auto it = find_precompiled(uid);
-            const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
-
-            const bool is_compute = entry.type == ShaderType::Compute;
-            const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
-            auto registry = MakeRegistry(entry);
-            const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
-
-            ProgramSharedPtr program;
-            if (precompiled_entry) {
-                // If the shader is precompiled, attempt to load it with
-                program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
-                if (!program) {
-                    gl_cache_failed = true;
-                }
-            }
-            if (!program) {
-                // Otherwise compile it from GLSL
-                program = BuildShader(device, entry.type, uid, ir, *registry, true);
-            }
-
-            PrecompiledShader shader;
-            shader.program = std::move(program);
-            shader.registry = std::move(registry);
-            shader.entries = MakeEntries(device, ir, entry.type);
-
-            std::scoped_lock lock{mutex};
-            if (callback) {
-                callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
-                         transferable->size());
-            }
-            runtime_cache.emplace(entry.unique_identifier, std::move(shader));
-        }
-    };
-
-    const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
-    const std::size_t bucket_size{transferable->size() / num_workers};
-    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
-    std::vector<std::thread> threads(num_workers);
-    for (std::size_t i = 0; i < num_workers; ++i) {
-        const bool is_last_worker = i + 1 == num_workers;
-        const std::size_t start{bucket_size * i};
-        const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
-
-        // On some platforms the shared context has to be created from the GUI thread
-        contexts[i] = emu_window.CreateSharedContext();
-        threads[i] = std::thread(worker, contexts[i].get(), start, end);
+ComputePipeline* ShaderCache::CurrentComputePipeline() {
+    const VideoCommon::ShaderInfo* const shader{ComputeShader()};
+    if (!shader) {
+        return nullptr;
     }
-    for (auto& thread : threads) {
-        thread.join();
+    const auto& qmd{kepler_compute.launch_description};
+    const ComputePipelineKey key{
+        .unique_hash = shader->unique_hash,
+        .shared_memory_size = qmd.shared_alloc,
+        .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+    };
+    const auto [pair, is_new]{compute_cache.try_emplace(key)};
+    auto& pipeline{pair->second};
+    if (!is_new) {
+        return pipeline.get();
     }
+    pipeline = CreateComputePipeline(key, shader);
+    return pipeline.get();
+}
 
-    if (gl_cache_failed) {
-        // Invalidate the precompiled cache if a shader dumped shader was rejected
-        disk_cache.InvalidatePrecompiled();
-        precompiled_cache_altered = true;
-        return;
-    }
-    if (stop_loading.stop_requested()) {
-        return;
-    }
+std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
+    GraphicsEnvironments environments;
+    GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
 
-    if (device.UseAssemblyShaders() || device.UseDriverCache()) {
-        // Don't store precompiled binaries for assembly shaders or when using the driver cache
-        return;
+    main_pools.ReleaseContents();
+    auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(),
+                                         use_asynchronous_shaders)};
+    if (!pipeline || shader_cache_filename.empty()) {
+        return pipeline;
     }
-
-    // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
-    // before precompiling them
-
-    for (std::size_t i = 0; i < transferable->size(); ++i) {
-        const u64 id = (*transferable)[i].unique_identifier;
-        const auto it = find_precompiled(id);
-        if (it == gl_cache.end()) {
-            const GLuint program = runtime_cache.at(id).program->source_program.handle;
-            disk_cache.SavePrecompiled(id, program);
-            precompiled_cache_altered = true;
+    boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs;
+    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        if (graphics_key.unique_hashes[index] != 0) {
+            env_ptrs.push_back(&environments.envs[index]);
         }
     }
-
-    if (precompiled_cache_altered) {
-        disk_cache.SaveVirtualPrecompiledFile();
-    }
-}
-
-ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
-    const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
-    const std::unordered_set<GLenum>& supported_formats) {
-    if (!supported_formats.contains(precompiled_entry.binary_format)) {
-        LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
-        return {};
-    }
-
-    auto program = std::make_shared<ProgramHandle>();
-    GLuint& handle = program->source_program.handle;
-    handle = glCreateProgram();
-    glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
-    glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
-                    static_cast<GLsizei>(precompiled_entry.binary.size()));
-
-    GLint link_status;
-    glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
-    if (link_status == GL_FALSE) {
-        LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
-        return {};
-    }
-
-    return program;
+    SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION);
+    return pipeline;
 }
 
-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
-                                           VideoCommon::Shader::AsyncShaders& async_shaders) {
-    if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
-        auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
-        if (last_shader->IsBuilt()) {
-            return last_shader;
+std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
+    ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
+    std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+    LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+    size_t env_index{};
+    u32 total_storage_buffers{};
+    std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+    const bool uses_vertex_a{key.unique_hashes[0] != 0};
+    const bool uses_vertex_b{key.unique_hashes[1] != 0};
+    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        if (key.unique_hashes[index] == 0) {
+            continue;
         }
-    }
+        Shader::Environment& env{*envs[env_index]};
+        ++env_index;
 
-    const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
+        const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+        Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+        if (!uses_vertex_a || index != 1) {
+            // Normal path
+            programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
 
-    if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
-        auto completed_work = async_shaders.GetCompletedWork();
-        for (auto& work : completed_work) {
-            Shader* shader = TryGet(work.cpu_address);
-            gpu.ShaderNotify().MarkShaderComplete();
-            if (shader == nullptr) {
-                continue;
+            for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
+                total_storage_buffers += desc.count;
             }
-            using namespace VideoCommon::Shader;
-            if (work.backend == AsyncShaders::Backend::OpenGL) {
-                shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
-            } else if (work.backend == AsyncShaders::Backend::GLASM) {
-                shader->AsyncGLASMBuilt(std::move(work.program.glasm));
+        } else {
+            // VertexB path when VertexA is present.
+            auto& program_va{programs[0]};
+            auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+            for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
+                total_storage_buffers += desc.count;
             }
-
-            auto& registry = shader->GetRegistry();
-
-            ShaderDiskCacheEntry entry;
-            entry.type = work.shader_type;
-            entry.code = std::move(work.code);
-            entry.code_b = std::move(work.code_b);
-            entry.unique_identifier = work.uid;
-            entry.bound_buffer = registry.GetBoundBuffer();
-            entry.graphics_info = registry.GetGraphicsInfo();
-            entry.keys = registry.GetKeys();
-            entry.bound_samplers = registry.GetBoundSamplers();
-            entry.bindless_samplers = registry.GetBindlessSamplers();
-            disk_cache.SaveEntry(std::move(entry));
+            programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
         }
     }
-
-    // Look up shader in the cache based on address
-    const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
-    if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
-        return last_shaders[static_cast<std::size_t>(program)] = shader;
-    }
-
-    const u8* const host_ptr{gpu_memory.GetPointer(address)};
-
-    // No shader found - create a new one
-    ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
-    ProgramCode code_b;
-    if (program == Maxwell::ShaderProgram::VertexA) {
-        const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
-        const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
-        code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
-    }
-    const std::size_t code_size = code.size() * sizeof(u64);
-
-    const u64 unique_identifier = GetUniqueIdentifier(
-        GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
-
-    const ShaderParameters params{gpu,       maxwell3d, disk_cache,       device,
-                                  *cpu_addr, host_ptr,  unique_identifier};
-
-    std::unique_ptr<Shader> shader;
-    const auto found = runtime_cache.find(unique_identifier);
-    if (found == runtime_cache.end()) {
-        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
-                                               async_shaders, cpu_addr.value_or(0));
-    } else {
-        shader = Shader::CreateFromCache(params, found->second);
-    }
-
-    Shader* const result = shader.get();
-    if (cpu_addr) {
-        Register(std::move(shader), *cpu_addr, code_size);
-    } else {
-        null_shader = std::move(shader);
+    const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
+    const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
+
+    std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
+
+    OGLProgram source_program;
+    std::array<std::string, 5> sources;
+    std::array<std::vector<u32>, 5> sources_spirv;
+    Shader::Backend::Bindings binding;
+    Shader::IR::Program* previous_program{};
+    const bool use_glasm{device.UseAssemblyShaders()};
+    const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
+    for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) {
+        if (key.unique_hashes[index] == 0) {
+            continue;
+        }
+        UNIMPLEMENTED_IF(index == 0);
+
+        Shader::IR::Program& program{programs[index]};
+        const size_t stage_index{index - 1};
+        infos[stage_index] = &program.info;
+
+        const auto runtime_info{
+            MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)};
+        switch (device.GetShaderBackend()) {
+        case Settings::ShaderBackend::GLSL:
+            sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
+            break;
+        case Settings::ShaderBackend::GLASM:
+            sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
+            break;
+        case Settings::ShaderBackend::SPIRV:
+            sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding);
+            break;
+        }
+        previous_program = &program;
     }
+    auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
+    return std::make_unique<GraphicsPipeline>(
+        device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
+        thread_worker, &shader_notify, sources, sources_spirv, infos, key);
 
-    return last_shaders[static_cast<std::size_t>(program)] = result;
+} catch (Shader::Exception& exception) {
+    LOG_ERROR(Render_OpenGL, "{}", exception.what());
+    return nullptr;
 }
 
-Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
-    const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
-
-    if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
-        return kernel;
-    }
-
-    // No kernel found, create a new one
-    const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
-    ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
-    const std::size_t code_size{code.size() * sizeof(u64)};
-    const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
-
-    const ShaderParameters params{gpu,       kepler_compute, disk_cache,       device,
-                                  *cpu_addr, host_ptr,       unique_identifier};
+std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
+    const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
+    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+    const auto& qmd{kepler_compute.launch_description};
+    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    env.SetCachedSize(shader->size_bytes);
+
+    main_pools.ReleaseContents();
+    auto pipeline{CreateComputePipeline(main_pools, key, env)};
+    if (!pipeline || shader_cache_filename.empty()) {
+        return pipeline;
+    }
+    SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, shader_cache_filename,
+                      CACHE_VERSION);
+    return pipeline;
+}
 
-    std::unique_ptr<Shader> kernel;
-    const auto found = runtime_cache.find(unique_identifier);
-    if (found == runtime_cache.end()) {
-        kernel = Shader::CreateKernelFromMemory(params, std::move(code));
-    } else {
-        kernel = Shader::CreateFromCache(params, found->second);
-    }
+std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
+    ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
+    Shader::Environment& env) try {
+    LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+
+    Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+    auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+
+    u32 num_storage_buffers{};
+    for (const auto& desc : program.info.storage_buffers_descriptors) {
+        num_storage_buffers += desc.count;
+    }
+    Shader::RuntimeInfo info;
+    info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+
+    std::string code{};
+    std::vector<u32> code_spirv;
+    switch (device.GetShaderBackend()) {
+    case Settings::ShaderBackend::GLSL:
+        code = EmitGLSL(profile, program);
+        break;
+    case Settings::ShaderBackend::GLASM:
+        code = EmitGLASM(profile, info, program);
+        break;
+    case Settings::ShaderBackend::SPIRV:
+        code_spirv = EmitSPIRV(profile, program);
+        break;
+    }
+
+    return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
+                                             kepler_compute, program_manager, program.info, code,
+                                             code_spirv);
+} catch (Shader::Exception& exception) {
+    LOG_ERROR(Render_OpenGL, "{}", exception.what());
+    return nullptr;
+}
 
-    Shader* const result = kernel.get();
-    if (cpu_addr) {
-        Register(std::move(kernel), *cpu_addr, code_size);
-    } else {
-        null_kernel = std::move(kernel);
-    }
-    return result;
+std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
+    return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
+                                          "yuzu:ShaderBuilder",
+                                          [this] { return Context{emu_window}; });
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b30308b6f..a34110b37 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,157 +5,93 @@
 #pragma once
 
 #include <array>
-#include <atomic>
-#include <bitset>
-#include <memory>
-#include <string>
-#include <tuple>
+#include <filesystem>
+#include <stop_token>
 #include <unordered_map>
-#include <unordered_set>
-#include <vector>
 
 #include <glad/glad.h>
 
 #include "common/common_types.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/profile.h"
+#include "video_core/renderer_opengl/gl_compute_pipeline.h"
+#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_context.h"
 #include "video_core/shader_cache.h"
 
 namespace Tegra {
 class MemoryManager;
 }
 
-namespace Core::Frontend {
-class EmuWindow;
-}
-
-namespace VideoCommon::Shader {
-class AsyncShaders;
-}
-
 namespace OpenGL {
 
 class Device;
+class ProgramManager;
 class RasterizerOpenGL;
+using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
 
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-struct ProgramHandle {
-    OGLProgram source_program;
-    OGLAssemblyProgram assembly_program;
-};
-using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
-
-struct PrecompiledShader {
-    ProgramSharedPtr program;
-    std::shared_ptr<VideoCommon::Shader::Registry> registry;
-    ShaderEntries entries;
-};
-
-struct ShaderParameters {
-    Tegra::GPU& gpu;
-    Tegra::Engines::ConstBufferEngineInterface& engine;
-    ShaderDiskCacheOpenGL& disk_cache;
-    const Device& device;
-    VAddr cpu_addr;
-    const u8* host_ptr;
-    u64 unique_identifier;
-};
-
-ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
-                             u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
-                             const VideoCommon::Shader::Registry& registry,
-                             bool hint_retrievable = false);
-
-class Shader final {
+class ShaderCache : public VideoCommon::ShaderCache {
 public:
-    ~Shader();
-
-    /// Gets the GL program handle for the shader
-    GLuint GetHandle() const;
-
-    bool IsBuilt() const;
-
-    /// Gets the shader entries for the shader
-    const ShaderEntries& GetEntries() const {
-        return entries;
-    }
-
-    const VideoCommon::Shader::Registry& GetRegistry() const {
-        return *registry;
-    }
-
-    /// Mark a OpenGL shader as built
-    void AsyncOpenGLBuilt(OGLProgram new_program);
+    explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
+                         Tegra::Engines::Maxwell3D& maxwell3d_,
+                         Tegra::Engines::KeplerCompute& kepler_compute_,
+                         Tegra::MemoryManager& gpu_memory_, const Device& device_,
+                         TextureCache& texture_cache_, BufferCache& buffer_cache_,
+                         ProgramManager& program_manager_, StateTracker& state_tracker_,
+                         VideoCore::ShaderNotify& shader_notify_);
+    ~ShaderCache();
 
-    /// Mark a GLASM shader as built
-    void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
+    void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+                           const VideoCore::DiskResourceLoadCallback& callback);
 
-    static std::unique_ptr<Shader> CreateStageFromMemory(
-        const ShaderParameters& params, Maxwell::ShaderProgram program_type,
-        ProgramCode program_code, ProgramCode program_code_b,
-        VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
+    [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
 
-    static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
-                                                          ProgramCode code);
-
-    static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
-                                                   const PrecompiledShader& precompiled_shader);
+    [[nodiscard]] ComputePipeline* CurrentComputePipeline();
 
 private:
-    explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
-                    ProgramSharedPtr program, bool is_built_ = true);
-
-    std::shared_ptr<VideoCommon::Shader::Registry> registry;
-    ShaderEntries entries;
-    ProgramSharedPtr program;
-    GLuint handle = 0;
-    bool is_built{};
-};
+    GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
 
-class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
-public:
-    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
-                               Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
-                               Tegra::Engines::Maxwell3D& maxwell3d_,
-                               Tegra::Engines::KeplerCompute& kepler_compute_,
-                               Tegra::MemoryManager& gpu_memory_, const Device& device_);
-    ~ShaderCacheOpenGL() override;
+    [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
 
-    /// Loads disk cache for the current game
-    void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
-                       const VideoCore::DiskResourceLoadCallback& callback);
+    std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
 
-    /// Gets the current specified shader stage program
-    Shader* GetStageProgram(Maxwell::ShaderProgram program,
-                            VideoCommon::Shader::AsyncShaders& async_shaders);
+    std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
+        ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
+        std::span<Shader::Environment* const> envs, bool build_in_parallel);
 
-    /// Gets a compute kernel in the passed address
-    Shader* GetComputeKernel(GPUVAddr code_addr);
+    std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
+                                                           const VideoCommon::ShaderInfo* shader);
 
-private:
-    ProgramSharedPtr GeneratePrecompiledProgram(
-        const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
-        const std::unordered_set<GLenum>& supported_formats);
+    std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
+                                                           const ComputePipelineKey& key,
+                                                           Shader::Environment& env);
+
+    std::unique_ptr<ShaderWorker> CreateWorkers() const;
 
     Core::Frontend::EmuWindow& emu_window;
-    Tegra::GPU& gpu;
-    Tegra::MemoryManager& gpu_memory;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
     const Device& device;
+    TextureCache& texture_cache;
+    BufferCache& buffer_cache;
+    ProgramManager& program_manager;
+    StateTracker& state_tracker;
+    VideoCore::ShaderNotify& shader_notify;
+    const bool use_asynchronous_shaders;
+
+    GraphicsPipelineKey graphics_key{};
+    GraphicsPipeline* current_pipeline{};
 
-    ShaderDiskCacheOpenGL disk_cache;
-    std::unordered_map<u64, PrecompiledShader> runtime_cache;
+    ShaderContext::ShaderPools main_pools;
+    std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
+    std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
 
-    std::unique_ptr<Shader> null_shader;
-    std::unique_ptr<Shader> null_kernel;
+    Shader::Profile profile;
+    Shader::HostTranslateInfo host_info;
 
-    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+    std::filesystem::path shader_cache_filename;
+    std::unique_ptr<ShaderWorker> workers;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
new file mode 100644
index 000000000..6ff34e5d6
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -0,0 +1,33 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/frontend/emu_window.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+
+namespace OpenGL::ShaderContext {
+struct ShaderPools {
+    void ReleaseContents() {
+        flow_block.ReleaseContents();
+        block.ReleaseContents();
+        inst.ReleaseContents();
+    }
+
+    Shader::ObjectPool<Shader::IR::Inst> inst;
+    Shader::ObjectPool<Shader::IR::Block> block;
+    Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+};
+
+struct Context {
+    explicit Context(Core::Frontend::EmuWindow& emu_window)
+        : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
+
+    std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
+    Core::Frontend::GraphicsContext::Scoped scoped;
+    ShaderPools pools;
+};
+
+} // namespace OpenGL::ShaderContext
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 9c28498e8..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/div_ceil.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/shader/transform_feedback.h"
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::Header;
-using Tegra::Shader::IpaInterpMode;
-using Tegra::Shader::IpaMode;
-using Tegra::Shader::IpaSampleMode;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureType;
-
-using namespace VideoCommon::Shader;
-using namespace std::string_literals;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using Operation = const OperationNode&;
-
-class ASTDecompiler;
-class ExprDecompiler;
-
-enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-
-constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
-
-constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
-constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
-
-struct TextureOffset {};
-struct TextureDerivates {};
-using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
-
-constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
-
-constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
-#define ftou floatBitsToUint
-#define itof intBitsToFloat
-#define utof uintBitsToFloat
-
-bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
-    bvec2 is_nan1 = isnan(pair1);
-    bvec2 is_nan2 = isnan(pair2);
-    return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
-}}
-
-const float fswzadd_modifiers_a[] = float[4](-1.0f,  1.0f, -1.0f,  0.0f );
-const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f,  1.0f, -1.0f );
-)";
-
-class ShaderWriter final {
-public:
-    void AddExpression(std::string_view text) {
-        DEBUG_ASSERT(scope >= 0);
-        if (!text.empty()) {
-            AppendIndentation();
-        }
-        shader_source += text;
-    }
-
-    // Forwards all arguments directly to libfmt.
-    // Note that all formatting requirements for fmt must be
-    // obeyed when using this function. (e.g. {{ must be used
-    // printing the character '{' is desirable. Ditto for }} and '}',
-    // etc).
-    template <typename... Args>
-    void AddLine(std::string_view text, Args&&... args) {
-        AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
-        AddNewLine();
-    }
-
-    void AddNewLine() {
-        DEBUG_ASSERT(scope >= 0);
-        shader_source += '\n';
-    }
-
-    std::string GenerateTemporary() {
-        return fmt::format("tmp{}", temporary_index++);
-    }
-
-    std::string GetResult() {
-        return std::move(shader_source);
-    }
-
-    s32 scope = 0;
-
-private:
-    void AppendIndentation() {
-        shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
-    }
-
-    std::string shader_source;
-    u32 temporary_index = 1;
-};
-
-class Expression final {
-public:
-    Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
-        ASSERT(type != Type::Void);
-    }
-    Expression() : type{Type::Void} {}
-
-    Type GetType() const {
-        return type;
-    }
-
-    std::string GetCode() const {
-        return code;
-    }
-
-    void CheckVoid() const {
-        ASSERT(type == Type::Void);
-    }
-
-    std::string As(Type type_) const {
-        switch (type_) {
-        case Type::Bool:
-            return AsBool();
-        case Type::Bool2:
-            return AsBool2();
-        case Type::Float:
-            return AsFloat();
-        case Type::Int:
-            return AsInt();
-        case Type::Uint:
-            return AsUint();
-        case Type::HalfFloat:
-            return AsHalfFloat();
-        default:
-            UNREACHABLE_MSG("Invalid type");
-            return code;
-        }
-    }
-
-    std::string AsBool() const {
-        switch (type) {
-        case Type::Bool:
-            return code;
-        default:
-            UNREACHABLE_MSG("Incompatible types");
-            return code;
-        }
-    }
-
-    std::string AsBool2() const {
-        switch (type) {
-        case Type::Bool2:
-            return code;
-        default:
-            UNREACHABLE_MSG("Incompatible types");
-            return code;
-        }
-    }
-
-    std::string AsFloat() const {
-        switch (type) {
-        case Type::Float:
-            return code;
-        case Type::Uint:
-            return fmt::format("utof({})", code);
-        case Type::Int:
-            return fmt::format("itof({})", code);
-        case Type::HalfFloat:
-            return fmt::format("utof(packHalf2x16({}))", code);
-        default:
-            UNREACHABLE_MSG("Incompatible types");
-            return code;
-        }
-    }
-
-    std::string AsInt() const {
-        switch (type) {
-        case Type::Float:
-            return fmt::format("ftoi({})", code);
-        case Type::Uint:
-            return fmt::format("int({})", code);
-        case Type::Int:
-            return code;
-        case Type::HalfFloat:
-            return fmt::format("int(packHalf2x16({}))", code);
-        default:
-            UNREACHABLE_MSG("Incompatible types");
-            return code;
-        }
-    }
-
-    std::string AsUint() const {
-        switch (type) {
-        case Type::Float:
-            return fmt::format("ftou({})", code);
-        case Type::Uint:
-            return code;
-        case Type::Int:
-            return fmt::format("uint({})", code);
-        case Type::HalfFloat:
-            return fmt::format("packHalf2x16({})", code);
-        default:
-            UNREACHABLE_MSG("Incompatible types");
-            return code;
-        }
-    }
-
-    std::string AsHalfFloat() const {
-        switch (type) {
-        case Type::Float:
-            return fmt::format("unpackHalf2x16(ftou({}))", code);
-        case Type::Uint:
-            return fmt::format("unpackHalf2x16({})", code);
-        case Type::Int:
-            return fmt::format("unpackHalf2x16(int({}))", code);
-        case Type::HalfFloat:
-            return code;
-        default:
-            UNREACHABLE_MSG("Incompatible types");
-            return code;
-        }
-    }
-
-private:
-    std::string code;
-    Type type{};
-};
-
-const char* GetTypeString(Type type) {
-    switch (type) {
-    case Type::Bool:
-        return "bool";
-    case Type::Bool2:
-        return "bvec2";
-    case Type::Float:
-        return "float";
-    case Type::Int:
-        return "int";
-    case Type::Uint:
-        return "uint";
-    case Type::HalfFloat:
-        return "vec2";
-    default:
-        UNREACHABLE_MSG("Invalid type");
-        return "<invalid type>";
-    }
-}
-
-const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
-    switch (image_type) {
-    case Tegra::Shader::ImageType::Texture1D:
-        return "1D";
-    case Tegra::Shader::ImageType::TextureBuffer:
-        return "Buffer";
-    case Tegra::Shader::ImageType::Texture1DArray:
-        return "1DArray";
-    case Tegra::Shader::ImageType::Texture2D:
-        return "2D";
-    case Tegra::Shader::ImageType::Texture2DArray:
-        return "2DArray";
-    case Tegra::Shader::ImageType::Texture3D:
-        return "3D";
-    default:
-        UNREACHABLE();
-        return "1D";
-    }
-}
-
-/// Describes primitive behavior on geometry shaders
-std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
-    switch (topology) {
-    case Maxwell::PrimitiveTopology::Points:
-        return {"points", 1};
-    case Maxwell::PrimitiveTopology::Lines:
-    case Maxwell::PrimitiveTopology::LineStrip:
-        return {"lines", 2};
-    case Maxwell::PrimitiveTopology::LinesAdjacency:
-    case Maxwell::PrimitiveTopology::LineStripAdjacency:
-        return {"lines_adjacency", 4};
-    case Maxwell::PrimitiveTopology::Triangles:
-    case Maxwell::PrimitiveTopology::TriangleStrip:
-    case Maxwell::PrimitiveTopology::TriangleFan:
-        return {"triangles", 3};
-    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
-    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
-        return {"triangles_adjacency", 6};
-    default:
-        UNIMPLEMENTED_MSG("topology={}", topology);
-        return {"points", 1};
-    }
-}
-
-/// Generates code to use for a swizzle operation.
-constexpr const char* GetSwizzle(std::size_t element) {
-    constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
-    return swizzle.at(element);
-}
-
-constexpr const char* GetColorSwizzle(std::size_t element) {
-    constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
-    return swizzle.at(element);
-}
-
-/// Translate topology
-std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
-    switch (topology) {
-    case Tegra::Shader::OutputTopology::PointList:
-        return "points";
-    case Tegra::Shader::OutputTopology::LineStrip:
-        return "line_strip";
-    case Tegra::Shader::OutputTopology::TriangleStrip:
-        return "triangle_strip";
-    default:
-        UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
-        return "points";
-    }
-}
-
-/// Returns true if an object has to be treated as precise
-bool IsPrecise(Operation operand) {
-    const auto& meta{operand.GetMeta()};
-    if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
-        return arithmetic->precise;
-    }
-    return false;
-}
-
-bool IsPrecise(const Node& node) {
-    if (const auto operation = std::get_if<OperationNode>(&*node)) {
-        return IsPrecise(*operation);
-    }
-    return false;
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
-    return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-constexpr bool IsLegacyTexCoord(Attribute::Index index) {
-    return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
-           static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
-}
-
-constexpr Attribute::Index ToGenericAttribute(u64 value) {
-    return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
-}
-
-constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
-    return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
-    ASSERT(IsGenericAttribute(index));
-    return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
-    switch (stack) {
-    case MetaStackClass::Ssy:
-        return "ssy";
-    case MetaStackClass::Pbk:
-        return "pbk";
-    }
-    return {};
-}
-
-std::string FlowStackName(MetaStackClass stack) {
-    return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
-}
-
-std::string FlowStackTopName(MetaStackClass stack) {
-    return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
-}
-
-struct GenericVaryingDescription {
-    std::string name;
-    u8 first_element = 0;
-    bool is_scalar = false;
-};
-
-class GLSLDecompiler final {
-public:
-    explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
-                            ShaderType stage_, std::string_view identifier_,
-                            std::string_view suffix_)
-        : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
-          identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
-        if (stage != ShaderType::Compute) {
-            transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
-        }
-    }
-
-    void Decompile() {
-        DeclareHeader();
-        DeclareVertex();
-        DeclareGeometry();
-        DeclareFragment();
-        DeclareCompute();
-        DeclareInputAttributes();
-        DeclareOutputAttributes();
-        DeclareImages();
-        DeclareSamplers();
-        DeclareGlobalMemory();
-        DeclareConstantBuffers();
-        DeclareLocalMemory();
-        DeclareRegisters();
-        DeclarePredicates();
-        DeclareInternalFlags();
-        DeclareCustomVariables();
-        DeclarePhysicalAttributeReader();
-
-        code.AddLine("void main() {{");
-        ++code.scope;
-
-        if (stage == ShaderType::Vertex) {
-            code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
-        }
-
-        if (ir.IsDecompiled()) {
-            DecompileAST();
-        } else {
-            DecompileBranchMode();
-        }
-
-        --code.scope;
-        code.AddLine("}}");
-    }
-
-    std::string GetResult() {
-        return code.GetResult();
-    }
-
-private:
-    friend class ASTDecompiler;
-    friend class ExprDecompiler;
-
-    void DecompileBranchMode() {
-        // VM's program counter
-        const auto first_address = ir.GetBasicBlocks().begin()->first;
-        code.AddLine("uint jmp_to = {}U;", first_address);
-
-        // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
-        // unlikely that shaders will use 20 nested SSYs and PBKs.
-        constexpr u32 FLOW_STACK_SIZE = 20;
-        if (!ir.IsFlowStackDisabled()) {
-            for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
-                code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
-                code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
-            }
-        }
-
-        code.AddLine("while (true) {{");
-        ++code.scope;
-
-        code.AddLine("switch (jmp_to) {{");
-
-        for (const auto& pair : ir.GetBasicBlocks()) {
-            const auto& [address, bb] = pair;
-            code.AddLine("case 0x{:X}U: {{", address);
-            ++code.scope;
-
-            VisitBlock(bb);
-
-            --code.scope;
-            code.AddLine("}}");
-        }
-
-        code.AddLine("default: return;");
-        code.AddLine("}}");
-
-        --code.scope;
-        code.AddLine("}}");
-    }
-
-    void DecompileAST();
-
-    void DeclareHeader() {
-        if (!identifier.empty()) {
-            code.AddLine("// {}", identifier);
-        }
-        const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
-        code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
-        code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
-        if (device.HasShaderBallot()) {
-            code.AddLine("#extension GL_ARB_shader_ballot : require");
-        }
-        if (device.HasVertexViewportLayer()) {
-            code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
-        }
-        if (device.HasImageLoadFormatted()) {
-            code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
-        }
-        if (device.HasTextureShadowLod()) {
-            code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
-        }
-        if (device.HasWarpIntrinsics()) {
-            code.AddLine("#extension GL_NV_gpu_shader5 : require");
-            code.AddLine("#extension GL_NV_shader_thread_group : require");
-            code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
-        }
-        // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
-        // operations) on places where we don't want to.
-        // Thanks to Ryujinx for finding this workaround.
-        code.AddLine("#pragma optionNV(fastmath off)");
-
-        code.AddNewLine();
-
-        code.AddLine(COMMON_DECLARATIONS);
-    }
-
-    void DeclareVertex() {
-        if (stage != ShaderType::Vertex) {
-            return;
-        }
-
-        DeclareVertexRedeclarations();
-    }
-
-    void DeclareGeometry() {
-        if (stage != ShaderType::Geometry) {
-            return;
-        }
-
-        const auto& info = registry.GetGraphicsInfo();
-        const auto input_topology = info.primitive_topology;
-        const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
-        max_input_vertices = max_vertices;
-        code.AddLine("layout ({}) in;", glsl_topology);
-
-        const auto topology = GetTopologyName(header.common3.output_topology);
-        const auto max_output_vertices = header.common4.max_output_vertices.Value();
-        code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
-        code.AddNewLine();
-
-        code.AddLine("in gl_PerVertex {{");
-        ++code.scope;
-        code.AddLine("vec4 gl_Position;");
-        --code.scope;
-        code.AddLine("}} gl_in[];");
-
-        DeclareVertexRedeclarations();
-    }
-
-    void DeclareFragment() {
-        if (stage != ShaderType::Fragment) {
-            return;
-        }
-        if (ir.UsesLegacyVaryings()) {
-            code.AddLine("in gl_PerFragment {{");
-            ++code.scope;
-            code.AddLine("vec4 gl_TexCoord[8];");
-            code.AddLine("vec4 gl_Color;");
-            code.AddLine("vec4 gl_SecondaryColor;");
-            --code.scope;
-            code.AddLine("}};");
-        }
-
-        for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
-            code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
-        }
-    }
-
-    void DeclareCompute() {
-        if (stage != ShaderType::Compute) {
-            return;
-        }
-        const auto& info = registry.GetComputeInfo();
-        if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
-            const u32 limit = device.GetMaxComputeSharedMemorySize();
-            if (size > limit) {
-                LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
-                          size, limit);
-                size = limit;
-            }
-
-            code.AddLine("shared uint smem[{}];", size / 4);
-            code.AddNewLine();
-        }
-        code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
-                     info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
-        code.AddNewLine();
-    }
-
-    void DeclareVertexRedeclarations() {
-        code.AddLine("out gl_PerVertex {{");
-        ++code.scope;
-
-        auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
-        if (!pos_xfb.empty()) {
-            pos_xfb = fmt::format("layout ({}) ", pos_xfb);
-        }
-        const char* pos_type =
-            FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
-        code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
-
-        for (const auto attribute : ir.GetOutputAttributes()) {
-            if (attribute == Attribute::Index::ClipDistances0123 ||
-                attribute == Attribute::Index::ClipDistances4567) {
-                code.AddLine("float gl_ClipDistance[];");
-                break;
-            }
-        }
-
-        if (stage != ShaderType::Geometry &&
-            (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
-            if (ir.UsesLayer()) {
-                code.AddLine("int gl_Layer;");
-            }
-            if (ir.UsesViewportIndex()) {
-                code.AddLine("int gl_ViewportIndex;");
-            }
-        } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
-                   !device.HasVertexViewportLayer()) {
-            LOG_ERROR(
-                Render_OpenGL,
-                "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
-        }
-
-        if (ir.UsesPointSize()) {
-            code.AddLine("float gl_PointSize;");
-        }
-
-        if (ir.UsesLegacyVaryings()) {
-            code.AddLine("vec4 gl_TexCoord[8];");
-            code.AddLine("vec4 gl_FrontColor;");
-            code.AddLine("vec4 gl_FrontSecondaryColor;");
-            code.AddLine("vec4 gl_BackColor;");
-            code.AddLine("vec4 gl_BackSecondaryColor;");
-        }
-
-        --code.scope;
-        code.AddLine("}};");
-        code.AddNewLine();
-
-        if (stage == ShaderType::Geometry) {
-            if (ir.UsesLayer()) {
-                code.AddLine("out int gl_Layer;");
-            }
-            if (ir.UsesViewportIndex()) {
-                code.AddLine("out int gl_ViewportIndex;");
-            }
-        }
-        code.AddNewLine();
-    }
-
-    void DeclareRegisters() {
-        const auto& registers = ir.GetRegisters();
-        for (const u32 gpr : registers) {
-            code.AddLine("float {} = 0.0f;", GetRegister(gpr));
-        }
-        if (!registers.empty()) {
-            code.AddNewLine();
-        }
-    }
-
-    void DeclareCustomVariables() {
-        const u32 num_custom_variables = ir.GetNumCustomVariables();
-        for (u32 i = 0; i < num_custom_variables; ++i) {
-            code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
-        }
-        if (num_custom_variables > 0) {
-            code.AddNewLine();
-        }
-    }
-
-    void DeclarePredicates() {
-        const auto& predicates = ir.GetPredicates();
-        for (const auto pred : predicates) {
-            code.AddLine("bool {} = false;", GetPredicate(pred));
-        }
-        if (!predicates.empty()) {
-            code.AddNewLine();
-        }
-    }
-
-    void DeclareLocalMemory() {
-        u64 local_memory_size = 0;
-        if (stage == ShaderType::Compute) {
-            local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
-        } else {
-            local_memory_size = header.GetLocalMemorySize();
-        }
-        if (local_memory_size == 0) {
-            return;
-        }
-        const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
-        code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
-        code.AddNewLine();
-    }
-
-    void DeclareInternalFlags() {
-        for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
-            const auto flag_code = static_cast<InternalFlag>(flag);
-            code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
-        }
-        code.AddNewLine();
-    }
-
-    const char* GetInputFlags(PixelImap attribute) {
-        switch (attribute) {
-        case PixelImap::Perspective:
-            return "smooth";
-        case PixelImap::Constant:
-            return "flat";
-        case PixelImap::ScreenLinear:
-            return "noperspective";
-        case PixelImap::Unused:
-            break;
-        }
-        UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
-        return {};
-    }
-
-    void DeclareInputAttributes() {
-        if (ir.HasPhysicalAttributes()) {
-            const u32 num_inputs{GetNumPhysicalInputAttributes()};
-            for (u32 i = 0; i < num_inputs; ++i) {
-                DeclareInputAttribute(ToGenericAttribute(i), true);
-            }
-            code.AddNewLine();
-            return;
-        }
-
-        const auto& attributes = ir.GetInputAttributes();
-        for (const auto index : attributes) {
-            if (IsGenericAttribute(index)) {
-                DeclareInputAttribute(index, false);
-            }
-        }
-        if (!attributes.empty()) {
-            code.AddNewLine();
-        }
-    }
-
-    void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
-        const u32 location{GetGenericAttributeIndex(index)};
-
-        std::string name{GetGenericInputAttribute(index)};
-        if (stage == ShaderType::Geometry) {
-            name = "gs_" + name + "[]";
-        }
-
-        std::string suffix_;
-        if (stage == ShaderType::Fragment) {
-            const auto input_mode{header.ps.GetPixelImap(location)};
-            if (input_mode == PixelImap::Unused) {
-                return;
-            }
-            suffix_ = GetInputFlags(input_mode);
-        }
-
-        code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
-    }
-
-    void DeclareOutputAttributes() {
-        if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
-            for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
-                DeclareOutputAttribute(ToGenericAttribute(i));
-            }
-            code.AddNewLine();
-            return;
-        }
-
-        const auto& attributes = ir.GetOutputAttributes();
-        for (const auto index : attributes) {
-            if (IsGenericAttribute(index)) {
-                DeclareOutputAttribute(index);
-            }
-        }
-        if (!attributes.empty()) {
-            code.AddNewLine();
-        }
-    }
-
-    std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
-        const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
-        const auto it = transform_feedback.find(location);
-        if (it == transform_feedback.end()) {
-            return std::nullopt;
-        }
-        return it->second.components;
-    }
-
-    std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
-        const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
-        const auto it = transform_feedback.find(location);
-        if (it == transform_feedback.end()) {
-            return {};
-        }
-
-        const VaryingTFB& tfb = it->second;
-        return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
-                           tfb.offset, tfb.stride);
-    }
-
-    void DeclareOutputAttribute(Attribute::Index index) {
-        static constexpr std::string_view swizzle = "xyzw";
-        u8 element = 0;
-        while (element < 4) {
-            auto xfb = GetTransformFeedbackDecoration(index, element);
-            if (!xfb.empty()) {
-                xfb = fmt::format(", {}", xfb);
-            }
-            const std::size_t remainder = 4 - element;
-            const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
-            const char* const type = FLOAT_TYPES.at(num_components - 1);
-
-            const u32 location = GetGenericAttributeIndex(index);
-
-            GenericVaryingDescription description;
-            description.first_element = static_cast<u8>(element);
-            description.is_scalar = num_components == 1;
-            description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
-            if (element != 0 || num_components != 4) {
-                const std::string_view name_swizzle = swizzle.substr(element, num_components);
-                description.name = fmt::format("{}_{}", description.name, name_swizzle);
-            }
-            for (std::size_t i = 0; i < num_components; ++i) {
-                const u8 offset = static_cast<u8>(location * 4 + element + i);
-                varying_description.insert({offset, description});
-            }
-
-            code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
-                         xfb, type, description.name);
-
-            element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
-        }
-    }
-
-    void DeclareConstantBuffers() {
-        u32 binding = device.GetBaseBindings(stage).uniform_buffer;
-        for (const auto& [index, info] : ir.GetConstantBuffers()) {
-            const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
-            const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
-            code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
-                         GetConstBufferBlock(index));
-            code.AddLine("    uvec4 {}[{}];", GetConstBuffer(index), size);
-            code.AddLine("}};");
-            code.AddNewLine();
-        }
-    }
-
-    void DeclareGlobalMemory() {
-        u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
-        for (const auto& [base, usage] : ir.GetGlobalMemory()) {
-            // Since we don't know how the shader will use the shader, hint the driver to disable as
-            // much optimizations as possible
-            std::string qualifier = "coherent volatile";
-            if (usage.is_read && !usage.is_written) {
-                qualifier += " readonly";
-            } else if (usage.is_written && !usage.is_read) {
-                qualifier += " writeonly";
-            }
-
-            code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
-                         GetGlobalMemoryBlock(base));
-            code.AddLine("    uint {}[];", GetGlobalMemory(base));
-            code.AddLine("}};");
-            code.AddNewLine();
-        }
-    }
-
-    void DeclareSamplers() {
-        u32 binding = device.GetBaseBindings(stage).sampler;
-        for (const auto& sampler : ir.GetSamplers()) {
-            const std::string name = GetSampler(sampler);
-            const std::string description = fmt::format("layout (binding = {}) uniform", binding);
-            binding += sampler.is_indexed ? sampler.size : 1;
-
-            std::string sampler_type = [&]() {
-                if (sampler.is_buffer) {
-                    return "samplerBuffer";
-                }
-                switch (sampler.type) {
-                case TextureType::Texture1D:
-                    return "sampler1D";
-                case TextureType::Texture2D:
-                    return "sampler2D";
-                case TextureType::Texture3D:
-                    return "sampler3D";
-                case TextureType::TextureCube:
-                    return "samplerCube";
-                default:
-                    UNREACHABLE();
-                    return "sampler2D";
-                }
-            }();
-            if (sampler.is_array) {
-                sampler_type += "Array";
-            }
-            if (sampler.is_shadow) {
-                sampler_type += "Shadow";
-            }
-
-            if (!sampler.is_indexed) {
-                code.AddLine("{} {} {};", description, sampler_type, name);
-            } else {
-                code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
-            }
-        }
-        if (!ir.GetSamplers().empty()) {
-            code.AddNewLine();
-        }
-    }
-
-    void DeclarePhysicalAttributeReader() {
-        if (!ir.HasPhysicalAttributes()) {
-            return;
-        }
-        code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
-        ++code.scope;
-        code.AddLine("switch (physical_address) {{");
-
-        // Just declare generic attributes for now.
-        const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
-        for (u32 index = 0; index < num_attributes; ++index) {
-            const auto attribute{ToGenericAttribute(index)};
-            for (u32 element = 0; element < 4; ++element) {
-                constexpr u32 generic_base = 0x80;
-                constexpr u32 generic_stride = 16;
-                constexpr u32 element_stride = 4;
-                const u32 address{generic_base + index * generic_stride + element * element_stride};
-
-                const bool declared = stage != ShaderType::Fragment ||
-                                      header.ps.GetPixelImap(index) != PixelImap::Unused;
-                const std::string value =
-                    declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
-                code.AddLine("case 0x{:X}U: return {};", address, value);
-            }
-        }
-
-        code.AddLine("default: return 0;");
-
-        code.AddLine("}}");
-        --code.scope;
-        code.AddLine("}}");
-        code.AddNewLine();
-    }
-
-    void DeclareImages() {
-        u32 binding = device.GetBaseBindings(stage).image;
-        for (const auto& image : ir.GetImages()) {
-            std::string qualifier = "coherent volatile";
-            if (image.is_read && !image.is_written) {
-                qualifier += " readonly";
-            } else if (image.is_written && !image.is_read) {
-                qualifier += " writeonly";
-            }
-
-            const char* format = image.is_atomic ? "r32ui, " : "";
-            const char* type_declaration = GetImageTypeDeclaration(image.type);
-            code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
-                         qualifier, type_declaration, GetImage(image));
-        }
-        if (!ir.GetImages().empty()) {
-            code.AddNewLine();
-        }
-    }
-
-    void VisitBlock(const NodeBlock& bb) {
-        for (const auto& node : bb) {
-            Visit(node).CheckVoid();
-        }
-    }
-
-    Expression Visit(const Node& node) {
-        if (const auto operation = std::get_if<OperationNode>(&*node)) {
-            if (const auto amend_index = operation->GetAmendIndex()) {
-                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
-            }
-            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
-            if (operation_index >= operation_decompilers.size()) {
-                UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
-                return {};
-            }
-            const auto decompiler = operation_decompilers[operation_index];
-            if (decompiler == nullptr) {
-                UNREACHABLE_MSG("Undefined operation: {}", operation_index);
-                return {};
-            }
-            return (this->*decompiler)(*operation);
-        }
-
-        if (const auto gpr = std::get_if<GprNode>(&*node)) {
-            const u32 index = gpr->GetIndex();
-            if (index == Register::ZeroIndex) {
-                return {"0U", Type::Uint};
-            }
-            return {GetRegister(index), Type::Float};
-        }
-
-        if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
-            const u32 index = cv->GetIndex();
-            return {GetCustomVariable(index), Type::Float};
-        }
-
-        if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
-            const u32 value = immediate->GetValue();
-            if (value < 10) {
-                // For eyecandy avoid using hex numbers on single digits
-                return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
-            }
-            return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
-        }
-
-        if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
-            const auto value = [&]() -> std::string {
-                switch (const auto index = predicate->GetIndex(); index) {
-                case Tegra::Shader::Pred::UnusedIndex:
-                    return "true";
-                case Tegra::Shader::Pred::NeverExecute:
-                    return "false";
-                default:
-                    return GetPredicate(index);
-                }
-            }();
-            if (predicate->IsNegated()) {
-                return {fmt::format("!({})", value), Type::Bool};
-            }
-            return {value, Type::Bool};
-        }
-
-        if (const auto abuf = std::get_if<AbufNode>(&*node)) {
-            UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
-                                 "Physical attributes in geometry shaders are not implemented");
-            if (abuf->IsPhysicalBuffer()) {
-                return {fmt::format("ReadPhysicalAttribute({})",
-                                    Visit(abuf->GetPhysicalAddress()).AsUint()),
-                        Type::Float};
-            }
-            return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
-        }
-
-        if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
-            const Node offset = cbuf->GetOffset();
-
-            if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
-                // Direct access
-                const u32 offset_imm = immediate->GetValue();
-                ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
-                return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
-                                    offset_imm / (4 * 4), (offset_imm / 4) % 4),
-                        Type::Uint};
-            }
-
-            // Indirect access
-            const std::string final_offset = code.GenerateTemporary();
-            code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
-
-            if (!device.HasComponentIndexingBug()) {
-                return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
-                                    final_offset, final_offset),
-                        Type::Uint};
-            }
-
-            // AMD's proprietary GLSL compiler emits ill code for variable component access.
-            // To bypass this driver bug generate 4 ifs, one per each component.
-            const std::string pack = code.GenerateTemporary();
-            code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
-                         final_offset);
-
-            const std::string result = code.GenerateTemporary();
-            code.AddLine("uint {};", result);
-            for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
-                code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
-                             GetSwizzle(swizzle));
-            }
-            return {result, Type::Uint};
-        }
-
-        if (const auto gmem = std::get_if<GmemNode>(&*node)) {
-            const std::string real = Visit(gmem->GetRealAddress()).AsUint();
-            const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
-            const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
-            return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
-                    Type::Uint};
-        }
-
-        if (const auto lmem = std::get_if<LmemNode>(&*node)) {
-            return {
-                fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
-                Type::Uint};
-        }
-
-        if (const auto smem = std::get_if<SmemNode>(&*node)) {
-            return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
-        }
-
-        if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
-            return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
-        }
-
-        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
-            if (const auto amend_index = conditional->GetAmendIndex()) {
-                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
-            }
-            // It's invalid to call conditional on nested nodes, use an operation instead
-            code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
-            ++code.scope;
-
-            VisitBlock(conditional->GetCode());
-
-            --code.scope;
-            code.AddLine("}}");
-            return {};
-        }
-
-        if (const auto comment = std::get_if<CommentNode>(&*node)) {
-            code.AddLine("// " + comment->GetText());
-            return {};
-        }
-
-        UNREACHABLE();
-        return {};
-    }
-
-    Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
-        const auto GeometryPass = [&](std::string_view name) {
-            if (stage == ShaderType::Geometry && buffer) {
-                // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
-                // set an 0x80000000 index for those and the shader fails to build. Find out why
-                // this happens and what's its intent.
-                return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
-                                   max_input_vertices.value());
-            }
-            return std::string(name);
-        };
-
-        switch (attribute) {
-        case Attribute::Index::Position:
-            switch (stage) {
-            case ShaderType::Geometry:
-                return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
-                                    GetSwizzle(element)),
-                        Type::Float};
-            case ShaderType::Fragment:
-                return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
-            default:
-                UNREACHABLE();
-                return {"0", Type::Int};
-            }
-        case Attribute::Index::FrontColor:
-            return {"gl_Color"s + GetSwizzle(element), Type::Float};
-        case Attribute::Index::FrontSecondaryColor:
-            return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
-        case Attribute::Index::PointCoord:
-            switch (element) {
-            case 0:
-                return {"gl_PointCoord.x", Type::Float};
-            case 1:
-                return {"gl_PointCoord.y", Type::Float};
-            case 2:
-            case 3:
-                return {"0.0f", Type::Float};
-            }
-            UNREACHABLE();
-            return {"0", Type::Int};
-        case Attribute::Index::TessCoordInstanceIDVertexID:
-            // TODO(Subv): Find out what the values are for the first two elements when inside a
-            // vertex shader, and what's the value of the fourth element when inside a Tess Eval
-            // shader.
-            ASSERT(stage == ShaderType::Vertex);
-            switch (element) {
-            case 2:
-                // Config pack's first value is instance_id.
-                return {"gl_InstanceID", Type::Int};
-            case 3:
-                return {"gl_VertexID", Type::Int};
-            }
-            UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
-            return {"0", Type::Int};
-        case Attribute::Index::FrontFacing:
-            // TODO(Subv): Find out what the values are for the other elements.
-            ASSERT(stage == ShaderType::Fragment);
-            switch (element) {
-            case 3:
-                return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
-            }
-            UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
-            return {"0", Type::Int};
-        default:
-            if (IsGenericAttribute(attribute)) {
-                return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
-                        Type::Float};
-            }
-            if (IsLegacyTexCoord(attribute)) {
-                UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
-                return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
-                                    GetSwizzle(element)),
-                        Type::Float};
-            }
-            break;
-        }
-        UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
-        return {"0", Type::Int};
-    }
-
-    Expression ApplyPrecise(Operation operation, std::string value, Type type) {
-        if (!IsPrecise(operation)) {
-            return {std::move(value), type};
-        }
-        // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
-        // be found in fragment shaders, so we disable precise there. There are vertex shaders that
-        // also fail to build but nobody seems to care about those.
-        // Note: Only bugged drivers will skip precise.
-        const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
-
-        std::string temporary = code.GenerateTemporary();
-        code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
-                     temporary, value);
-        return {std::move(temporary), type};
-    }
-
-    Expression VisitOperand(Operation operation, std::size_t operand_index) {
-        const auto& operand = operation[operand_index];
-        const bool parent_precise = IsPrecise(operation);
-        const bool child_precise = IsPrecise(operand);
-        const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
-        if (!parent_precise || child_precise || child_trivial) {
-            return Visit(operand);
-        }
-
-        Expression value = Visit(operand);
-        std::string temporary = code.GenerateTemporary();
-        code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
-        return {std::move(temporary), value.GetType()};
-    }
-
-    std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
-        const u32 element = abuf->GetElement();
-        switch (const auto attribute = abuf->GetIndex()) {
-        case Attribute::Index::Position:
-            return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
-        case Attribute::Index::LayerViewportPointSize:
-            switch (element) {
-            case 0:
-                UNIMPLEMENTED();
-                return std::nullopt;
-            case 1:
-                if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
-                    return std::nullopt;
-                }
-                return {{"gl_Layer", Type::Int}};
-            case 2:
-                if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
-                    return std::nullopt;
-                }
-                return {{"gl_ViewportIndex", Type::Int}};
-            case 3:
-                return {{"gl_PointSize", Type::Float}};
-            }
-            return std::nullopt;
-        case Attribute::Index::FrontColor:
-            return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
-        case Attribute::Index::FrontSecondaryColor:
-            return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
-        case Attribute::Index::BackColor:
-            return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
-        case Attribute::Index::BackSecondaryColor:
-            return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
-        case Attribute::Index::ClipDistances0123:
-            return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
-        case Attribute::Index::ClipDistances4567:
-            return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
-        default:
-            if (IsGenericAttribute(attribute)) {
-                return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
-            }
-            if (IsLegacyTexCoord(attribute)) {
-                return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
-                                     GetSwizzle(element)),
-                         Type::Float}};
-            }
-            UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
-            return std::nullopt;
-        }
-    }
-
-    Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
-                             Type type_a) {
-        std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
-        return ApplyPrecise(operation, std::move(op_str), result_type);
-    }
-
-    Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
-                                   Type type_a, Type type_b) {
-        const std::string op_a = VisitOperand(operation, 0).As(type_a);
-        const std::string op_b = VisitOperand(operation, 1).As(type_b);
-        std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
-
-        return ApplyPrecise(operation, std::move(op_str), result_type);
-    }
-
-    Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
-                                  Type type_a, Type type_b) {
-        const std::string op_a = VisitOperand(operation, 0).As(type_a);
-        const std::string op_b = VisitOperand(operation, 1).As(type_b);
-        std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
-
-        return ApplyPrecise(operation, std::move(op_str), result_type);
-    }
-
-    Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
-                               Type type_a, Type type_b, Type type_c) {
-        const std::string op_a = VisitOperand(operation, 0).As(type_a);
-        const std::string op_b = VisitOperand(operation, 1).As(type_b);
-        const std::string op_c = VisitOperand(operation, 2).As(type_c);
-        std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
-
-        return ApplyPrecise(operation, std::move(op_str), result_type);
-    }
-
-    Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
-                                  Type type_a, Type type_b, Type type_c, Type type_d) {
-        const std::string op_a = VisitOperand(operation, 0).As(type_a);
-        const std::string op_b = VisitOperand(operation, 1).As(type_b);
-        const std::string op_c = VisitOperand(operation, 2).As(type_c);
-        const std::string op_d = VisitOperand(operation, 3).As(type_d);
-        std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
-
-        return ApplyPrecise(operation, std::move(op_str), result_type);
-    }
-
-    std::string GenerateTexture(Operation operation, const std::string& function_suffix,
-                                const std::vector<TextureIR>& extras, bool separate_dc = false) {
-        constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
-
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
-
-        const std::size_t count = operation.GetOperandsCount();
-        const bool has_array = meta->sampler.is_array;
-        const bool has_shadow = meta->sampler.is_shadow;
-        const bool workaround_lod_array_shadow_as_grad =
-            !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
-            ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
-             meta->sampler.type == TextureType::TextureCube);
-
-        std::string expr = "texture";
-
-        if (workaround_lod_array_shadow_as_grad) {
-            expr += "Grad";
-        } else {
-            expr += function_suffix;
-        }
-
-        if (!meta->aoffi.empty()) {
-            expr += "Offset";
-        } else if (!meta->ptp.empty()) {
-            expr += "Offsets";
-        }
-        if (!meta->sampler.is_indexed) {
-            expr += '(' + GetSampler(meta->sampler) + ", ";
-        } else {
-            expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
-        }
-        expr += coord_constructors.at(count + (has_array ? 1 : 0) +
-                                      (has_shadow && !separate_dc ? 1 : 0) - 1);
-        expr += '(';
-        for (std::size_t i = 0; i < count; ++i) {
-            expr += Visit(operation[i]).AsFloat();
-
-            const std::size_t next = i + 1;
-            if (next < count)
-                expr += ", ";
-        }
-        if (has_array) {
-            expr += ", float(" + Visit(meta->array).AsInt() + ')';
-        }
-        if (has_shadow) {
-            if (separate_dc) {
-                expr += "), " + Visit(meta->depth_compare).AsFloat();
-            } else {
-                expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
-            }
-        } else {
-            expr += ')';
-        }
-
-        if (workaround_lod_array_shadow_as_grad) {
-            switch (meta->sampler.type) {
-            case TextureType::Texture2D:
-                return expr + ", vec2(0.0), vec2(0.0))";
-            case TextureType::TextureCube:
-                return expr + ", vec3(0.0), vec3(0.0))";
-            default:
-                UNREACHABLE();
-                break;
-            }
-        }
-
-        for (const auto& variant : extras) {
-            if (const auto argument = std::get_if<TextureArgument>(&variant)) {
-                expr += GenerateTextureArgument(*argument);
-            } else if (std::holds_alternative<TextureOffset>(variant)) {
-                if (!meta->aoffi.empty()) {
-                    expr += GenerateTextureAoffi(meta->aoffi);
-                } else if (!meta->ptp.empty()) {
-                    expr += GenerateTexturePtp(meta->ptp);
-                }
-            } else if (std::holds_alternative<TextureDerivates>(variant)) {
-                expr += GenerateTextureDerivates(meta->derivates);
-            } else {
-                UNREACHABLE();
-            }
-        }
-
-        return expr + ')';
-    }
-
-    std::string GenerateTextureArgument(const TextureArgument& argument) {
-        const auto& [type, operand] = argument;
-        if (operand == nullptr) {
-            return {};
-        }
-
-        std::string expr = ", ";
-        switch (type) {
-        case Type::Int:
-            if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
-                // Inline the string as an immediate integer in GLSL (some extra arguments are
-                // required to be constant)
-                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
-            } else {
-                expr += Visit(operand).AsInt();
-            }
-            break;
-        case Type::Float:
-            expr += Visit(operand).AsFloat();
-            break;
-        default: {
-            const auto type_int = static_cast<u32>(type);
-            UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
-            expr += '0';
-            break;
-        }
-        }
-        return expr;
-    }
-
-    std::string ReadTextureOffset(const Node& value) {
-        if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
-            // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
-            // to be constant by the standard).
-            return std::to_string(static_cast<s32>(immediate->GetValue()));
-        } else if (device.HasVariableAoffi()) {
-            // Avoid using variable AOFFI on unsupported devices.
-            return Visit(value).AsInt();
-        } else {
-            // Insert 0 on devices not supporting variable AOFFI.
-            return "0";
-        }
-    }
-
-    std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
-        if (aoffi.empty()) {
-            return {};
-        }
-        constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
-        std::string expr = ", ";
-        expr += coord_constructors.at(aoffi.size() - 1);
-        expr += '(';
-
-        for (std::size_t index = 0; index < aoffi.size(); ++index) {
-            expr += ReadTextureOffset(aoffi.at(index));
-            if (index + 1 < aoffi.size()) {
-                expr += ", ";
-            }
-        }
-        expr += ')';
-
-        return expr;
-    }
-
-    std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
-        static constexpr std::size_t num_vectors = 4;
-        ASSERT(ptp.size() == num_vectors * 2);
-
-        std::string expr = ", ivec2[](";
-        for (std::size_t vector = 0; vector < num_vectors; ++vector) {
-            const bool has_next = vector + 1 < num_vectors;
-            expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
-                                ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
-        }
-        expr += ')';
-        return expr;
-    }
-
-    std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
-        if (derivates.empty()) {
-            return {};
-        }
-        constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
-        std::string expr = ", ";
-        const std::size_t components = derivates.size() / 2;
-        std::string dx = coord_constructors.at(components - 1);
-        std::string dy = coord_constructors.at(components - 1);
-        dx += '(';
-        dy += '(';
-
-        for (std::size_t index = 0; index < components; ++index) {
-            const auto& operand_x{derivates.at(index * 2)};
-            const auto& operand_y{derivates.at(index * 2 + 1)};
-            dx += Visit(operand_x).AsFloat();
-            dy += Visit(operand_y).AsFloat();
-
-            if (index + 1 < components) {
-                dx += ", ";
-                dy += ", ";
-            }
-        }
-        dx += ')';
-        dy += ')';
-        expr += dx + ", " + dy;
-
-        return expr;
-    }
-
-    std::string BuildIntegerCoordinates(Operation operation) {
-        constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
-        const std::size_t coords_count{operation.GetOperandsCount()};
-        std::string expr = constructors.at(coords_count - 1);
-        for (std::size_t i = 0; i < coords_count; ++i) {
-            expr += VisitOperand(operation, i).AsInt();
-            if (i + 1 < coords_count) {
-                expr += ", ";
-            }
-        }
-        expr += ')';
-        return expr;
-    }
-
-    std::string BuildImageValues(Operation operation) {
-        constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
-        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-
-        const std::size_t values_count{meta.values.size()};
-        std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
-        for (std::size_t i = 0; i < values_count; ++i) {
-            expr += Visit(meta.values.at(i)).AsUint();
-            if (i + 1 < values_count) {
-                expr += ", ";
-            }
-        }
-        expr += ')';
-        return expr;
-    }
-
-    Expression Assign(Operation operation) {
-        const Node& dest = operation[0];
-        const Node& src = operation[1];
-
-        Expression target;
-        if (const auto gpr = std::get_if<GprNode>(&*dest)) {
-            if (gpr->GetIndex() == Register::ZeroIndex) {
-                // Writing to Register::ZeroIndex is a no op but we still have to visit the source
-                // as it might have side effects.
-                code.AddLine("{};", Visit(src).GetCode());
-                return {};
-            }
-            target = {GetRegister(gpr->GetIndex()), Type::Float};
-        } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
-            UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
-            auto output = GetOutputAttribute(abuf);
-            if (!output) {
-                return {};
-            }
-            target = std::move(*output);
-        } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
-            target = {
-                fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
-                Type::Uint};
-        } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
-            ASSERT(stage == ShaderType::Compute);
-            target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
-        } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
-            const std::string real = Visit(gmem->GetRealAddress()).AsUint();
-            const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
-            const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
-            target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
-                      Type::Uint};
-        } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
-            target = {GetCustomVariable(cv->GetIndex()), Type::Float};
-        } else {
-            UNREACHABLE_MSG("Assign called without a proper target");
-        }
-
-        code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
-        return {};
-    }
-
-    template <Type type>
-    Expression Add(Operation operation) {
-        return GenerateBinaryInfix(operation, "+", type, type, type);
-    }
-
-    template <Type type>
-    Expression Mul(Operation operation) {
-        return GenerateBinaryInfix(operation, "*", type, type, type);
-    }
-
-    template <Type type>
-    Expression Div(Operation operation) {
-        return GenerateBinaryInfix(operation, "/", type, type, type);
-    }
-
-    template <Type type>
-    Expression Fma(Operation operation) {
-        return GenerateTernary(operation, "fma", type, type, type, type);
-    }
-
-    template <Type type>
-    Expression Negate(Operation operation) {
-        return GenerateUnary(operation, "-", type, type);
-    }
-
-    template <Type type>
-    Expression Absolute(Operation operation) {
-        return GenerateUnary(operation, "abs", type, type);
-    }
-
-    Expression FClamp(Operation operation) {
-        return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
-                               Type::Float);
-    }
-
-    Expression FCastHalf0(Operation operation) {
-        return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
-    }
-
-    Expression FCastHalf1(Operation operation) {
-        return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
-    }
-
-    template <Type type>
-    Expression Min(Operation operation) {
-        return GenerateBinaryCall(operation, "min", type, type, type);
-    }
-
-    template <Type type>
-    Expression Max(Operation operation) {
-        return GenerateBinaryCall(operation, "max", type, type, type);
-    }
-
-    Expression Select(Operation operation) {
-        const std::string condition = Visit(operation[0]).AsBool();
-        const std::string true_case = Visit(operation[1]).AsUint();
-        const std::string false_case = Visit(operation[2]).AsUint();
-        std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
-
-        return ApplyPrecise(operation, std::move(op_str), Type::Uint);
-    }
-
-    Expression FCos(Operation operation) {
-        return GenerateUnary(operation, "cos", Type::Float, Type::Float);
-    }
-
-    Expression FSin(Operation operation) {
-        return GenerateUnary(operation, "sin", Type::Float, Type::Float);
-    }
-
-    Expression FExp2(Operation operation) {
-        return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
-    }
-
-    Expression FLog2(Operation operation) {
-        return GenerateUnary(operation, "log2", Type::Float, Type::Float);
-    }
-
-    Expression FInverseSqrt(Operation operation) {
-        return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
-    }
-
-    Expression FSqrt(Operation operation) {
-        return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
-    }
-
-    Expression FRoundEven(Operation operation) {
-        return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
-    }
-
-    Expression FFloor(Operation operation) {
-        return GenerateUnary(operation, "floor", Type::Float, Type::Float);
-    }
-
-    Expression FCeil(Operation operation) {
-        return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
-    }
-
-    Expression FTrunc(Operation operation) {
-        return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
-    }
-
-    template <Type type>
-    Expression FCastInteger(Operation operation) {
-        return GenerateUnary(operation, "float", Type::Float, type);
-    }
-
-    Expression FSwizzleAdd(Operation operation) {
-        const std::string op_a = VisitOperand(operation, 0).AsFloat();
-        const std::string op_b = VisitOperand(operation, 1).AsFloat();
-
-        if (!device.HasShaderBallot()) {
-            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
-            return {fmt::format("{} + {}", op_a, op_b), Type::Float};
-        }
-
-        const std::string instr_mask = VisitOperand(operation, 2).AsUint();
-        const std::string mask = code.GenerateTemporary();
-        code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
-                     instr_mask);
-
-        const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
-        const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
-        return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
-                Type::Float};
-    }
-
-    Expression ICastFloat(Operation operation) {
-        return GenerateUnary(operation, "int", Type::Int, Type::Float);
-    }
-
-    Expression ICastUnsigned(Operation operation) {
-        return GenerateUnary(operation, "int", Type::Int, Type::Uint);
-    }
-
-    template <Type type>
-    Expression LogicalShiftLeft(Operation operation) {
-        return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
-    }
-
-    Expression ILogicalShiftRight(Operation operation) {
-        const std::string op_a = VisitOperand(operation, 0).AsUint();
-        const std::string op_b = VisitOperand(operation, 1).AsUint();
-        std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
-
-        return ApplyPrecise(operation, std::move(op_str), Type::Int);
-    }
-
-    Expression IArithmeticShiftRight(Operation operation) {
-        return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
-    }
-
-    template <Type type>
-    Expression BitwiseAnd(Operation operation) {
-        return GenerateBinaryInfix(operation, "&", type, type, type);
-    }
-
-    template <Type type>
-    Expression BitwiseOr(Operation operation) {
-        return GenerateBinaryInfix(operation, "|", type, type, type);
-    }
-
-    template <Type type>
-    Expression BitwiseXor(Operation operation) {
-        return GenerateBinaryInfix(operation, "^", type, type, type);
-    }
-
-    template <Type type>
-    Expression BitwiseNot(Operation operation) {
-        return GenerateUnary(operation, "~", type, type);
-    }
-
-    Expression UCastFloat(Operation operation) {
-        return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
-    }
-
-    Expression UCastSigned(Operation operation) {
-        return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
-    }
-
-    Expression UShiftRight(Operation operation) {
-        return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
-    }
-
-    template <Type type>
-    Expression BitfieldInsert(Operation operation) {
-        return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
-                                  Type::Int);
-    }
-
-    template <Type type>
-    Expression BitfieldExtract(Operation operation) {
-        return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
-    }
-
-    template <Type type>
-    Expression BitCount(Operation operation) {
-        return GenerateUnary(operation, "bitCount", type, type);
-    }
-
-    template <Type type>
-    Expression BitMSB(Operation operation) {
-        return GenerateUnary(operation, "findMSB", type, type);
-    }
-
-    Expression HNegate(Operation operation) {
-        const auto GetNegate = [&](std::size_t index) {
-            return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
-        };
-        return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
-                            GetNegate(1), GetNegate(2)),
-                Type::HalfFloat};
-    }
-
-    Expression HClamp(Operation operation) {
-        const std::string value = VisitOperand(operation, 0).AsHalfFloat();
-        const std::string min = VisitOperand(operation, 1).AsFloat();
-        const std::string max = VisitOperand(operation, 2).AsFloat();
-        std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
-
-        return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
-    }
-
-    Expression HCastFloat(Operation operation) {
-        return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
-                Type::HalfFloat};
-    }
-
-    Expression HUnpack(Operation operation) {
-        Expression operand = VisitOperand(operation, 0);
-        switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
-        case Tegra::Shader::HalfType::H0_H1:
-            return operand;
-        case Tegra::Shader::HalfType::F32:
-            return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
-        case Tegra::Shader::HalfType::H0_H0:
-            return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
-        case Tegra::Shader::HalfType::H1_H1:
-            return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
-        }
-        UNREACHABLE();
-        return {"0", Type::Int};
-    }
-
-    Expression HMergeF32(Operation operation) {
-        return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
-    }
-
-    Expression HMergeH0(Operation operation) {
-        const std::string dest = VisitOperand(operation, 0).AsUint();
-        const std::string src = VisitOperand(operation, 1).AsUint();
-        return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
-                Type::HalfFloat};
-    }
-
-    Expression HMergeH1(Operation operation) {
-        const std::string dest = VisitOperand(operation, 0).AsUint();
-        const std::string src = VisitOperand(operation, 1).AsUint();
-        return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
-                Type::HalfFloat};
-    }
-
-    Expression HPack2(Operation operation) {
-        return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
-                            VisitOperand(operation, 1).AsFloat()),
-                Type::HalfFloat};
-    }
-
-    template <const std::string_view& op, Type type, bool unordered = false>
-    Expression Comparison(Operation operation) {
-        static_assert(!unordered || type == Type::Float);
-
-        Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
-
-        if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
-            // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
-            // and Nvidia's proprietary stacks. Manually force an ordered comparison.
-            return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
-                                VisitOperand(operation, 0).AsFloat(),
-                                VisitOperand(operation, 1).AsFloat()),
-                    Type::Bool};
-        }
-        if constexpr (!unordered) {
-            return expr;
-        }
-        // Unordered comparisons are always true for NaN operands.
-        return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
-                            VisitOperand(operation, 0).AsFloat(),
-                            VisitOperand(operation, 1).AsFloat()),
-                Type::Bool};
-    }
-
-    Expression FOrdered(Operation operation) {
-        return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
-                            VisitOperand(operation, 1).AsFloat()),
-                Type::Bool};
-    }
-
-    Expression FUnordered(Operation operation) {
-        return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
-                            VisitOperand(operation, 1).AsFloat()),
-                Type::Bool};
-    }
-
-    Expression LogicalAddCarry(Operation operation) {
-        const std::string carry = code.GenerateTemporary();
-        code.AddLine("uint {};", carry);
-        code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
-                     VisitOperand(operation, 1).AsUint(), carry);
-        return {fmt::format("({} != 0)", carry), Type::Bool};
-    }
-
-    Expression LogicalAssign(Operation operation) {
-        const Node& dest = operation[0];
-        const Node& src = operation[1];
-
-        std::string target;
-
-        if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
-            ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-            case Tegra::Shader::Pred::UnusedIndex:
-                // Writing to these predicates is a no-op
-                return {};
-            }
-            target = GetPredicate(index);
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
-            target = GetInternalFlag(flag->GetFlag());
-        }
-
-        code.AddLine("{} = {};", target, Visit(src).AsBool());
-        return {};
-    }
-
-    Expression LogicalAnd(Operation operation) {
-        return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
-    }
-
-    Expression LogicalOr(Operation operation) {
-        return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
-    }
-
-    Expression LogicalXor(Operation operation) {
-        return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
-    }
-
-    Expression LogicalNegate(Operation operation) {
-        return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
-    }
-
-    Expression LogicalPick2(Operation operation) {
-        return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
-                            VisitOperand(operation, 1).AsUint()),
-                Type::Bool};
-    }
-
-    Expression LogicalAnd2(Operation operation) {
-        return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
-    }
-
-    template <bool with_nan>
-    Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
-        Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
-                                                   Type::HalfFloat, Type::HalfFloat);
-        if constexpr (!with_nan) {
-            return comparison;
-        }
-        return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
-                            VisitOperand(operation, 0).AsHalfFloat(),
-                            VisitOperand(operation, 1).AsHalfFloat()),
-                Type::Bool2};
-    }
-
-    template <bool with_nan>
-    Expression Logical2HLessThan(Operation operation) {
-        return GenerateHalfComparison<with_nan>(operation, "lessThan");
-    }
-
-    template <bool with_nan>
-    Expression Logical2HEqual(Operation operation) {
-        return GenerateHalfComparison<with_nan>(operation, "equal");
-    }
-
-    template <bool with_nan>
-    Expression Logical2HLessEqual(Operation operation) {
-        return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
-    }
-
-    template <bool with_nan>
-    Expression Logical2HGreaterThan(Operation operation) {
-        return GenerateHalfComparison<with_nan>(operation, "greaterThan");
-    }
-
-    template <bool with_nan>
-    Expression Logical2HNotEqual(Operation operation) {
-        return GenerateHalfComparison<with_nan>(operation, "notEqual");
-    }
-
-    template <bool with_nan>
-    Expression Logical2HGreaterEqual(Operation operation) {
-        return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
-    }
-
-    Expression Texture(Operation operation) {
-        const auto meta = std::get<MetaTexture>(operation.GetMeta());
-        const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
-                                 meta.sampler.is_array && meta.sampler.is_shadow;
-        // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
-        const std::vector<TextureIR> extras{
-            TextureOffset{},
-            TextureArgument{Type::Float, meta.bias},
-        };
-        std::string expr = GenerateTexture(operation, "", extras, separate_dc);
-        if (meta.sampler.is_shadow) {
-            expr = fmt::format("vec4({})", expr);
-        }
-        return {expr + GetSwizzle(meta.element), Type::Float};
-    }
-
-    Expression TextureLod(Operation operation) {
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
-
-        std::string expr{};
-
-        if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
-            ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
-             meta->sampler.type == TextureType::TextureCube)) {
-            LOG_ERROR(Render_OpenGL,
-                      "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
-            expr = GenerateTexture(operation, "Lod", {});
-        } else {
-            expr = GenerateTexture(operation, "Lod",
-                                   {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
-        }
-
-        if (meta->sampler.is_shadow) {
-            expr = "vec4(" + expr + ')';
-        }
-        return {expr + GetSwizzle(meta->element), Type::Float};
-    }
-
-    Expression TextureGather(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
-        const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
-        const bool separate_dc = meta.sampler.is_shadow;
-
-        std::vector<TextureIR> ir_;
-        if (meta.sampler.is_shadow) {
-            ir_ = {TextureOffset{}};
-        } else {
-            ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
-        }
-        return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
-                Type::Float};
-    }
-
-    Expression TextureQueryDimensions(Operation operation) {
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
-
-        const std::string sampler = GetSampler(meta->sampler);
-        const std::string lod = VisitOperand(operation, 0).AsInt();
-
-        switch (meta->element) {
-        case 0:
-        case 1:
-            return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
-                    Type::Int};
-        case 3:
-            return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
-        }
-        UNREACHABLE();
-        return {"0", Type::Int};
-    }
-
-    Expression TextureQueryLod(Operation operation) {
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
-
-        if (meta->element < 2) {
-            return {fmt::format("int(({} * vec2(256)){})",
-                                GenerateTexture(operation, "QueryLod", {}),
-                                GetSwizzle(meta->element)),
-                    Type::Int};
-        }
-        return {"0", Type::Int};
-    }
-
-    Expression TexelFetch(Operation operation) {
-        constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
-        UNIMPLEMENTED_IF(meta->sampler.is_array);
-        const std::size_t count = operation.GetOperandsCount();
-
-        std::string expr = "texelFetch(";
-        expr += GetSampler(meta->sampler);
-        expr += ", ";
-
-        expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
-        expr += '(';
-        for (std::size_t i = 0; i < count; ++i) {
-            if (i > 0) {
-                expr += ", ";
-            }
-            expr += VisitOperand(operation, i).AsInt();
-        }
-        if (meta->array) {
-            expr += ", ";
-            expr += Visit(meta->array).AsInt();
-        }
-        expr += ')';
-
-        if (meta->lod && !meta->sampler.is_buffer) {
-            expr += ", ";
-            expr += Visit(meta->lod).AsInt();
-        }
-        expr += ')';
-        expr += GetSwizzle(meta->element);
-
-        return {std::move(expr), Type::Float};
-    }
-
-    Expression TextureGradient(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        std::string expr =
-            GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
-        return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
-    }
-
-    Expression ImageLoad(Operation operation) {
-        if (!device.HasImageLoadFormatted()) {
-            LOG_ERROR(Render_OpenGL,
-                      "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
-            return {"0", Type::Int};
-        }
-
-        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-        return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
-                            BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
-                Type::Uint};
-    }
-
-    Expression ImageStore(Operation operation) {
-        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-        code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
-                     BuildIntegerCoordinates(operation), BuildImageValues(operation));
-        return {};
-    }
-
-    template <const std::string_view& opname>
-    Expression AtomicImage(Operation operation) {
-        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-        ASSERT(meta.values.size() == 1);
-
-        return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
-                            BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
-                Type::Uint};
-    }
-
-    template <const std::string_view& opname, Type type>
-    Expression Atomic(Operation operation) {
-        if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
-            UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
-            return {};
-        }
-        return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
-                            Visit(operation[1]).AsUint()),
-                Type::Uint};
-    }
-
-    template <const std::string_view& opname, Type type>
-    Expression Reduce(Operation operation) {
-        code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
-        return {};
-    }
-
-    Expression Branch(Operation operation) {
-        const auto target = std::get_if<ImmediateNode>(&*operation[0]);
-        UNIMPLEMENTED_IF(!target);
-
-        code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
-        code.AddLine("break;");
-        return {};
-    }
-
-    Expression BranchIndirect(Operation operation) {
-        const std::string op_a = VisitOperand(operation, 0).AsUint();
-
-        code.AddLine("jmp_to = {};", op_a);
-        code.AddLine("break;");
-        return {};
-    }
-
-    Expression PushFlowStack(Operation operation) {
-        const auto stack = std::get<MetaStackClass>(operation.GetMeta());
-        const auto target = std::get_if<ImmediateNode>(&*operation[0]);
-        UNIMPLEMENTED_IF(!target);
-
-        code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
-                     target->GetValue());
-        return {};
-    }
-
-    Expression PopFlowStack(Operation operation) {
-        const auto stack = std::get<MetaStackClass>(operation.GetMeta());
-        code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
-        code.AddLine("break;");
-        return {};
-    }
-
-    void PreExit() {
-        if (stage != ShaderType::Fragment) {
-            return;
-        }
-        const auto& used_registers = ir.GetRegisters();
-        const auto SafeGetRegister = [&](u32 reg) -> Expression {
-            // TODO(Rodrigo): Replace with contains once C++20 releases
-            if (used_registers.find(reg) != used_registers.end()) {
-                return {GetRegister(reg), Type::Float};
-            }
-            return {"0.0f", Type::Float};
-        };
-
-        UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
-
-        // Write the color outputs using the data in the shader registers, disabled
-        // rendertargets/components are skipped in the register assignment.
-        u32 current_reg = 0;
-        for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
-            // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
-            for (u32 component = 0; component < 4; ++component) {
-                if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
-                    code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
-                                 SafeGetRegister(current_reg).AsFloat());
-                    ++current_reg;
-                }
-            }
-        }
-        if (header.ps.omap.depth) {
-            // The depth output is always 2 registers after the last color output, and current_reg
-            // already contains one past the last color register.
-            code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
-        }
-    }
-
-    Expression Exit(Operation operation) {
-        PreExit();
-        code.AddLine("return;");
-        return {};
-    }
-
-    Expression Discard(Operation operation) {
-        // Enclose "discard" in a conditional, so that GLSL compilation does not complain
-        // about unexecuted instructions that may follow this.
-        code.AddLine("if (true) {{");
-        ++code.scope;
-        code.AddLine("discard;");
-        --code.scope;
-        code.AddLine("}}");
-        return {};
-    }
-
-    Expression EmitVertex(Operation operation) {
-        ASSERT_MSG(stage == ShaderType::Geometry,
-                   "EmitVertex is expected to be used in a geometry shader.");
-        code.AddLine("EmitVertex();");
-        return {};
-    }
-
-    Expression EndPrimitive(Operation operation) {
-        ASSERT_MSG(stage == ShaderType::Geometry,
-                   "EndPrimitive is expected to be used in a geometry shader.");
-        code.AddLine("EndPrimitive();");
-        return {};
-    }
-
-    Expression InvocationId(Operation operation) {
-        return {"gl_InvocationID", Type::Int};
-    }
-
-    Expression YNegate(Operation operation) {
-        // Y_NEGATE is mapped to this uniform value
-        return {"gl_FrontMaterial.ambient.a", Type::Float};
-    }
-
-    template <u32 element>
-    Expression LocalInvocationId(Operation) {
-        return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
-    }
-
-    template <u32 element>
-    Expression WorkGroupId(Operation) {
-        return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
-    }
-
-    Expression BallotThread(Operation operation) {
-        const std::string value = VisitOperand(operation, 0).AsBool();
-        if (!device.HasWarpIntrinsics()) {
-            LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
-            // Stub on non-Nvidia devices by simulating all threads voting the same as the active
-            // one.
-            return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
-        }
-        return {fmt::format("ballotThreadNV({})", value), Type::Uint};
-    }
-
-    Expression Vote(Operation operation, const char* func) {
-        const std::string value = VisitOperand(operation, 0).AsBool();
-        if (!device.HasWarpIntrinsics()) {
-            LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
-            // Stub with a warp size of one.
-            return {value, Type::Bool};
-        }
-        return {fmt::format("{}({})", func, value), Type::Bool};
-    }
-
-    Expression VoteAll(Operation operation) {
-        return Vote(operation, "allThreadsNV");
-    }
-
-    Expression VoteAny(Operation operation) {
-        return Vote(operation, "anyThreadNV");
-    }
-
-    Expression VoteEqual(Operation operation) {
-        if (!device.HasWarpIntrinsics()) {
-            LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
-            // We must return true here since a stub for a theoretical warp size of 1.
-            // This will always return an equal result across all votes.
-            return {"true", Type::Bool};
-        }
-        return Vote(operation, "allThreadsEqualNV");
-    }
-
-    Expression ThreadId(Operation operation) {
-        if (!device.HasShaderBallot()) {
-            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
-            return {"0U", Type::Uint};
-        }
-        return {"gl_SubGroupInvocationARB", Type::Uint};
-    }
-
-    template <const std::string_view& comparison>
-    Expression ThreadMask(Operation) {
-        if (device.HasWarpIntrinsics()) {
-            return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
-        }
-        if (device.HasShaderBallot()) {
-            return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
-        }
-        LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
-        return {"0U", Type::Uint};
-    }
-
-    Expression ShuffleIndexed(Operation operation) {
-        std::string value = VisitOperand(operation, 0).AsFloat();
-
-        if (!device.HasShaderBallot()) {
-            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
-            return {std::move(value), Type::Float};
-        }
-
-        const std::string index = VisitOperand(operation, 1).AsUint();
-        return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
-    }
-
-    Expression Barrier(Operation) {
-        if (!ir.IsDecompiled()) {
-            LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
-            return {};
-        }
-        code.AddLine("barrier();");
-        return {};
-    }
-
-    Expression MemoryBarrierGroup(Operation) {
-        code.AddLine("groupMemoryBarrier();");
-        return {};
-    }
-
-    Expression MemoryBarrierGlobal(Operation) {
-        code.AddLine("memoryBarrier();");
-        return {};
-    }
-
-    struct Func final {
-        Func() = delete;
-        ~Func() = delete;
-
-        static constexpr std::string_view LessThan = "<";
-        static constexpr std::string_view Equal = "==";
-        static constexpr std::string_view LessEqual = "<=";
-        static constexpr std::string_view GreaterThan = ">";
-        static constexpr std::string_view NotEqual = "!=";
-        static constexpr std::string_view GreaterEqual = ">=";
-
-        static constexpr std::string_view Eq = "Eq";
-        static constexpr std::string_view Ge = "Ge";
-        static constexpr std::string_view Gt = "Gt";
-        static constexpr std::string_view Le = "Le";
-        static constexpr std::string_view Lt = "Lt";
-
-        static constexpr std::string_view Add = "Add";
-        static constexpr std::string_view Min = "Min";
-        static constexpr std::string_view Max = "Max";
-        static constexpr std::string_view And = "And";
-        static constexpr std::string_view Or = "Or";
-        static constexpr std::string_view Xor = "Xor";
-        static constexpr std::string_view Exchange = "Exchange";
-    };
-
-    static constexpr std::array operation_decompilers = {
-        &GLSLDecompiler::Assign,
-
-        &GLSLDecompiler::Select,
-
-        &GLSLDecompiler::Add<Type::Float>,
-        &GLSLDecompiler::Mul<Type::Float>,
-        &GLSLDecompiler::Div<Type::Float>,
-        &GLSLDecompiler::Fma<Type::Float>,
-        &GLSLDecompiler::Negate<Type::Float>,
-        &GLSLDecompiler::Absolute<Type::Float>,
-        &GLSLDecompiler::FClamp,
-        &GLSLDecompiler::FCastHalf0,
-        &GLSLDecompiler::FCastHalf1,
-        &GLSLDecompiler::Min<Type::Float>,
-        &GLSLDecompiler::Max<Type::Float>,
-        &GLSLDecompiler::FCos,
-        &GLSLDecompiler::FSin,
-        &GLSLDecompiler::FExp2,
-        &GLSLDecompiler::FLog2,
-        &GLSLDecompiler::FInverseSqrt,
-        &GLSLDecompiler::FSqrt,
-        &GLSLDecompiler::FRoundEven,
-        &GLSLDecompiler::FFloor,
-        &GLSLDecompiler::FCeil,
-        &GLSLDecompiler::FTrunc,
-        &GLSLDecompiler::FCastInteger<Type::Int>,
-        &GLSLDecompiler::FCastInteger<Type::Uint>,
-        &GLSLDecompiler::FSwizzleAdd,
-
-        &GLSLDecompiler::Add<Type::Int>,
-        &GLSLDecompiler::Mul<Type::Int>,
-        &GLSLDecompiler::Div<Type::Int>,
-        &GLSLDecompiler::Negate<Type::Int>,
-        &GLSLDecompiler::Absolute<Type::Int>,
-        &GLSLDecompiler::Min<Type::Int>,
-        &GLSLDecompiler::Max<Type::Int>,
-
-        &GLSLDecompiler::ICastFloat,
-        &GLSLDecompiler::ICastUnsigned,
-        &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
-        &GLSLDecompiler::ILogicalShiftRight,
-        &GLSLDecompiler::IArithmeticShiftRight,
-        &GLSLDecompiler::BitwiseAnd<Type::Int>,
-        &GLSLDecompiler::BitwiseOr<Type::Int>,
-        &GLSLDecompiler::BitwiseXor<Type::Int>,
-        &GLSLDecompiler::BitwiseNot<Type::Int>,
-        &GLSLDecompiler::BitfieldInsert<Type::Int>,
-        &GLSLDecompiler::BitfieldExtract<Type::Int>,
-        &GLSLDecompiler::BitCount<Type::Int>,
-        &GLSLDecompiler::BitMSB<Type::Int>,
-
-        &GLSLDecompiler::Add<Type::Uint>,
-        &GLSLDecompiler::Mul<Type::Uint>,
-        &GLSLDecompiler::Div<Type::Uint>,
-        &GLSLDecompiler::Min<Type::Uint>,
-        &GLSLDecompiler::Max<Type::Uint>,
-        &GLSLDecompiler::UCastFloat,
-        &GLSLDecompiler::UCastSigned,
-        &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
-        &GLSLDecompiler::UShiftRight,
-        &GLSLDecompiler::UShiftRight,
-        &GLSLDecompiler::BitwiseAnd<Type::Uint>,
-        &GLSLDecompiler::BitwiseOr<Type::Uint>,
-        &GLSLDecompiler::BitwiseXor<Type::Uint>,
-        &GLSLDecompiler::BitwiseNot<Type::Uint>,
-        &GLSLDecompiler::BitfieldInsert<Type::Uint>,
-        &GLSLDecompiler::BitfieldExtract<Type::Uint>,
-        &GLSLDecompiler::BitCount<Type::Uint>,
-        &GLSLDecompiler::BitMSB<Type::Uint>,
-
-        &GLSLDecompiler::Add<Type::HalfFloat>,
-        &GLSLDecompiler::Mul<Type::HalfFloat>,
-        &GLSLDecompiler::Fma<Type::HalfFloat>,
-        &GLSLDecompiler::Absolute<Type::HalfFloat>,
-        &GLSLDecompiler::HNegate,
-        &GLSLDecompiler::HClamp,
-        &GLSLDecompiler::HCastFloat,
-        &GLSLDecompiler::HUnpack,
-        &GLSLDecompiler::HMergeF32,
-        &GLSLDecompiler::HMergeH0,
-        &GLSLDecompiler::HMergeH1,
-        &GLSLDecompiler::HPack2,
-
-        &GLSLDecompiler::LogicalAssign,
-        &GLSLDecompiler::LogicalAnd,
-        &GLSLDecompiler::LogicalOr,
-        &GLSLDecompiler::LogicalXor,
-        &GLSLDecompiler::LogicalNegate,
-        &GLSLDecompiler::LogicalPick2,
-        &GLSLDecompiler::LogicalAnd2,
-
-        &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
-        &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
-        &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
-        &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
-        &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
-        &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
-        &GLSLDecompiler::FOrdered,
-        &GLSLDecompiler::FUnordered,
-        &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
-        &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
-        &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
-        &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
-        &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
-        &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
-
-        &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
-        &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
-        &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
-        &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
-        &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
-        &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
-
-        &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
-        &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
-        &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
-        &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
-        &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
-        &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
-
-        &GLSLDecompiler::LogicalAddCarry,
-
-        &GLSLDecompiler::Logical2HLessThan<false>,
-        &GLSLDecompiler::Logical2HEqual<false>,
-        &GLSLDecompiler::Logical2HLessEqual<false>,
-        &GLSLDecompiler::Logical2HGreaterThan<false>,
-        &GLSLDecompiler::Logical2HNotEqual<false>,
-        &GLSLDecompiler::Logical2HGreaterEqual<false>,
-        &GLSLDecompiler::Logical2HLessThan<true>,
-        &GLSLDecompiler::Logical2HEqual<true>,
-        &GLSLDecompiler::Logical2HLessEqual<true>,
-        &GLSLDecompiler::Logical2HGreaterThan<true>,
-        &GLSLDecompiler::Logical2HNotEqual<true>,
-        &GLSLDecompiler::Logical2HGreaterEqual<true>,
-
-        &GLSLDecompiler::Texture,
-        &GLSLDecompiler::TextureLod,
-        &GLSLDecompiler::TextureGather,
-        &GLSLDecompiler::TextureQueryDimensions,
-        &GLSLDecompiler::TextureQueryLod,
-        &GLSLDecompiler::TexelFetch,
-        &GLSLDecompiler::TextureGradient,
-
-        &GLSLDecompiler::ImageLoad,
-        &GLSLDecompiler::ImageStore,
-
-        &GLSLDecompiler::AtomicImage<Func::Add>,
-        &GLSLDecompiler::AtomicImage<Func::And>,
-        &GLSLDecompiler::AtomicImage<Func::Or>,
-        &GLSLDecompiler::AtomicImage<Func::Xor>,
-        &GLSLDecompiler::AtomicImage<Func::Exchange>,
-
-        &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
-        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
-        &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
-        &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
-        &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
-        &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
-        &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
-
-        &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
-        &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
-        &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
-        &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
-        &GLSLDecompiler::Atomic<Func::And, Type::Int>,
-        &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
-        &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
-
-        &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
-        &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
-        &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
-        &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
-        &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
-        &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
-
-        &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
-        &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
-        &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
-        &GLSLDecompiler::Reduce<Func::And, Type::Int>,
-        &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
-        &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
-
-        &GLSLDecompiler::Branch,
-        &GLSLDecompiler::BranchIndirect,
-        &GLSLDecompiler::PushFlowStack,
-        &GLSLDecompiler::PopFlowStack,
-        &GLSLDecompiler::Exit,
-        &GLSLDecompiler::Discard,
-
-        &GLSLDecompiler::EmitVertex,
-        &GLSLDecompiler::EndPrimitive,
-
-        &GLSLDecompiler::InvocationId,
-        &GLSLDecompiler::YNegate,
-        &GLSLDecompiler::LocalInvocationId<0>,
-        &GLSLDecompiler::LocalInvocationId<1>,
-        &GLSLDecompiler::LocalInvocationId<2>,
-        &GLSLDecompiler::WorkGroupId<0>,
-        &GLSLDecompiler::WorkGroupId<1>,
-        &GLSLDecompiler::WorkGroupId<2>,
-
-        &GLSLDecompiler::BallotThread,
-        &GLSLDecompiler::VoteAll,
-        &GLSLDecompiler::VoteAny,
-        &GLSLDecompiler::VoteEqual,
-
-        &GLSLDecompiler::ThreadId,
-        &GLSLDecompiler::ThreadMask<Func::Eq>,
-        &GLSLDecompiler::ThreadMask<Func::Ge>,
-        &GLSLDecompiler::ThreadMask<Func::Gt>,
-        &GLSLDecompiler::ThreadMask<Func::Le>,
-        &GLSLDecompiler::ThreadMask<Func::Lt>,
-        &GLSLDecompiler::ShuffleIndexed,
-
-        &GLSLDecompiler::Barrier,
-        &GLSLDecompiler::MemoryBarrierGroup,
-        &GLSLDecompiler::MemoryBarrierGlobal,
-    };
-    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
-
-    std::string GetRegister(u32 index) const {
-        return AppendSuffix(index, "gpr");
-    }
-
-    std::string GetCustomVariable(u32 index) const {
-        return AppendSuffix(index, "custom_var");
-    }
-
-    std::string GetPredicate(Tegra::Shader::Pred pred) const {
-        return AppendSuffix(static_cast<u32>(pred), "pred");
-    }
-
-    std::string GetGenericInputAttribute(Attribute::Index attribute) const {
-        return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
-    }
-
-    std::unordered_map<u8, GenericVaryingDescription> varying_description;
-
-    std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
-        const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
-        const auto& description = varying_description.at(offset);
-        if (description.is_scalar) {
-            return description.name;
-        }
-        return fmt::format("{}[{}]", description.name, element - description.first_element);
-    }
-
-    std::string GetConstBuffer(u32 index) const {
-        return AppendSuffix(index, "cbuf");
-    }
-
-    std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
-        return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
-    }
-
-    std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
-        return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
-                           suffix);
-    }
-
-    std::string GetConstBufferBlock(u32 index) const {
-        return AppendSuffix(index, "cbuf_block");
-    }
-
-    std::string GetLocalMemory() const {
-        if (suffix.empty()) {
-            return "lmem";
-        } else {
-            return "lmem_" + std::string{suffix};
-        }
-    }
-
-    std::string GetInternalFlag(InternalFlag flag) const {
-        constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
-                                                  "overflow_flag"};
-        const auto index = static_cast<u32>(flag);
-        ASSERT(index < static_cast<u32>(InternalFlag::Amount));
-
-        if (suffix.empty()) {
-            return InternalFlagNames[index];
-        } else {
-            return fmt::format("{}_{}", InternalFlagNames[index], suffix);
-        }
-    }
-
-    std::string GetSampler(const SamplerEntry& sampler) const {
-        return AppendSuffix(sampler.index, "sampler");
-    }
-
-    std::string GetImage(const ImageEntry& image) const {
-        return AppendSuffix(image.index, "image");
-    }
-
-    std::string AppendSuffix(u32 index, std::string_view name) const {
-        if (suffix.empty()) {
-            return fmt::format("{}{}", name, index);
-        } else {
-            return fmt::format("{}{}_{}", name, index, suffix);
-        }
-    }
-
-    u32 GetNumPhysicalInputAttributes() const {
-        return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
-    }
-
-    u32 GetNumPhysicalAttributes() const {
-        return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
-    }
-
-    u32 GetNumPhysicalVaryings() const {
-        return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
-    }
-
-    const Device& device;
-    const ShaderIR& ir;
-    const Registry& registry;
-    const ShaderType stage;
-    const std::string_view identifier;
-    const std::string_view suffix;
-    const Header header;
-    std::unordered_map<u8, VaryingTFB> transform_feedback;
-
-    ShaderWriter code;
-
-    std::optional<u32> max_input_vertices;
-};
-
-std::string GetFlowVariable(u32 index) {
-    return fmt::format("flow_var{}", index);
-}
-
-class ExprDecompiler {
-public:
-    explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
-    void operator()(const ExprAnd& expr) {
-        inner += '(';
-        std::visit(*this, *expr.operand1);
-        inner += " && ";
-        std::visit(*this, *expr.operand2);
-        inner += ')';
-    }
-
-    void operator()(const ExprOr& expr) {
-        inner += '(';
-        std::visit(*this, *expr.operand1);
-        inner += " || ";
-        std::visit(*this, *expr.operand2);
-        inner += ')';
-    }
-
-    void operator()(const ExprNot& expr) {
-        inner += '!';
-        std::visit(*this, *expr.operand1);
-    }
-
-    void operator()(const ExprPredicate& expr) {
-        const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
-        inner += decomp.GetPredicate(pred);
-    }
-
-    void operator()(const ExprCondCode& expr) {
-        inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
-    }
-
-    void operator()(const ExprVar& expr) {
-        inner += GetFlowVariable(expr.var_index);
-    }
-
-    void operator()(const ExprBoolean& expr) {
-        inner += expr.value ? "true" : "false";
-    }
-
-    void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
-        inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
-    }
-
-    const std::string& GetResult() const {
-        return inner;
-    }
-
-private:
-    GLSLDecompiler& decomp;
-    std::string inner;
-};
-
-class ASTDecompiler {
-public:
-    explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
-    void operator()(const ASTProgram& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()(const ASTIfThen& ast) {
-        ExprDecompiler expr_parser{decomp};
-        std::visit(expr_parser, *ast.condition);
-        decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
-        decomp.code.scope++;
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        decomp.code.scope--;
-        decomp.code.AddLine("}}");
-    }
-
-    void operator()(const ASTIfElse& ast) {
-        decomp.code.AddLine("else {{");
-        decomp.code.scope++;
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        decomp.code.scope--;
-        decomp.code.AddLine("}}");
-    }
-
-    void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
-        UNREACHABLE();
-    }
-
-    void operator()(const ASTBlockDecoded& ast) {
-        decomp.VisitBlock(ast.nodes);
-    }
-
-    void operator()(const ASTVarSet& ast) {
-        ExprDecompiler expr_parser{decomp};
-        std::visit(expr_parser, *ast.condition);
-        decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
-    }
-
-    void operator()(const ASTLabel& ast) {
-        decomp.code.AddLine("// Label_{}:", ast.index);
-    }
-
-    void operator()([[maybe_unused]] const ASTGoto& ast) {
-        UNREACHABLE();
-    }
-
-    void operator()(const ASTDoWhile& ast) {
-        ExprDecompiler expr_parser{decomp};
-        std::visit(expr_parser, *ast.condition);
-        decomp.code.AddLine("do {{");
-        decomp.code.scope++;
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        decomp.code.scope--;
-        decomp.code.AddLine("}} while({});", expr_parser.GetResult());
-    }
-
-    void operator()(const ASTReturn& ast) {
-        const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
-        if (!is_true) {
-            ExprDecompiler expr_parser{decomp};
-            std::visit(expr_parser, *ast.condition);
-            decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
-            decomp.code.scope++;
-        }
-        if (ast.kills) {
-            decomp.code.AddLine("discard;");
-        } else {
-            decomp.PreExit();
-            decomp.code.AddLine("return;");
-        }
-        if (!is_true) {
-            decomp.code.scope--;
-            decomp.code.AddLine("}}");
-        }
-    }
-
-    void operator()(const ASTBreak& ast) {
-        const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
-        if (!is_true) {
-            ExprDecompiler expr_parser{decomp};
-            std::visit(expr_parser, *ast.condition);
-            decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
-            decomp.code.scope++;
-        }
-        decomp.code.AddLine("break;");
-        if (!is_true) {
-            decomp.code.scope--;
-            decomp.code.AddLine("}}");
-        }
-    }
-
-    void Visit(const ASTNode& node) {
-        std::visit(*this, *node->GetInnerData());
-    }
-
-private:
-    GLSLDecompiler& decomp;
-};
-
-void GLSLDecompiler::DecompileAST() {
-    const u32 num_flow_variables = ir.GetASTNumVariables();
-    for (u32 i = 0; i < num_flow_variables; i++) {
-        code.AddLine("bool {} = false;", GetFlowVariable(i));
-    }
-
-    ASTDecompiler decompiler{*this};
-    decompiler.Visit(ir.GetASTProgram());
-}
-
-} // Anonymous namespace
-
-ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
-    ShaderEntries entries;
-    for (const auto& cbuf : ir.GetConstantBuffers()) {
-        entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
-                                           cbuf.first);
-    }
-    for (const auto& [base, usage] : ir.GetGlobalMemory()) {
-        entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
-                                                   usage.is_written);
-    }
-    for (const auto& sampler : ir.GetSamplers()) {
-        entries.samplers.emplace_back(sampler);
-    }
-    for (const auto& image : ir.GetImages()) {
-        entries.images.emplace_back(image);
-    }
-    const auto clip_distances = ir.GetClipDistances();
-    for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
-        entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
-    }
-    for (const auto& buffer : entries.const_buffers) {
-        entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
-    }
-    entries.shader_length = ir.GetLength();
-    return entries;
-}
-
-std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
-                            ShaderType stage, std::string_view identifier,
-                            std::string_view suffix) {
-    GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
-    decompiler.Decompile();
-    return decompiler.GetResult();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL {
-
-class Device;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using SamplerEntry = VideoCommon::Shader::SamplerEntry;
-using ImageEntry = VideoCommon::Shader::ImageEntry;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
-    explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
-        : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
-
-    u32 GetIndex() const {
-        return index;
-    }
-
-private:
-    u32 index = 0;
-};
-
-struct GlobalMemoryEntry {
-    constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
-                                         bool is_written_)
-        : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
-                                                                                     is_written_} {}
-
-    u32 cbuf_index = 0;
-    u32 cbuf_offset = 0;
-    bool is_read = false;
-    bool is_written = false;
-};
-
-struct ShaderEntries {
-    std::vector<ConstBufferEntry> const_buffers;
-    std::vector<GlobalMemoryEntry> global_memory_entries;
-    std::vector<SamplerEntry> samplers;
-    std::vector<ImageEntry> images;
-    std::size_t shader_length{};
-    u32 clip_distances{};
-    u32 enabled_uniform_buffers{};
-};
-
-ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
-                          Tegra::Engines::ShaderType stage);
-
-std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
-                            const VideoCommon::Shader::Registry& registry,
-                            Tegra::Engines::ShaderType stage, std::string_view identifier,
-                            std::string_view suffix = {});
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/fs/file.h"
-#include "common/fs/fs.h"
-#include "common/fs/path_util.h"
-#include "common/logging/log.h"
-#include "common/scm_rev.h"
-#include "common/settings.h"
-#include "common/zstd_compression.h"
-#include "core/core.h"
-#include "core/hle/kernel/k_process.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-
-namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::BindlessSamplerMap;
-using VideoCommon::Shader::BoundSamplerMap;
-using VideoCommon::Shader::KeyMap;
-using VideoCommon::Shader::SeparateSamplerKey;
-using ShaderCacheVersionHash = std::array<u8, 64>;
-
-struct ConstBufferKey {
-    u32 cbuf = 0;
-    u32 offset = 0;
-    u32 value = 0;
-};
-
-struct BoundSamplerEntry {
-    u32 offset = 0;
-    Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct SeparateSamplerEntry {
-    u32 cbuf1 = 0;
-    u32 cbuf2 = 0;
-    u32 offset1 = 0;
-    u32 offset2 = 0;
-    Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct BindlessSamplerEntry {
-    u32 cbuf = 0;
-    u32 offset = 0;
-    Tegra::Engines::SamplerDescriptor sampler;
-};
-
-namespace {
-
-constexpr u32 NativeVersion = 21;
-
-ShaderCacheVersionHash GetShaderCacheVersionHash() {
-    ShaderCacheVersionHash hash{};
-    const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
-    std::memcpy(hash.data(), Common::g_shader_cache_version, length);
-    return hash;
-}
-
-} // Anonymous namespace
-
-ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
-
-ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-
-bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
-    if (!file.ReadObject(type)) {
-        return false;
-    }
-    u32 code_size;
-    u32 code_size_b;
-    if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
-        return false;
-    }
-    code.resize(code_size);
-    code_b.resize(code_size_b);
-    if (file.Read(code) != code_size) {
-        return false;
-    }
-    if (HasProgramA() && file.Read(code_b) != code_size_b) {
-        return false;
-    }
-
-    u8 is_texture_handler_size_known;
-    u32 texture_handler_size_value;
-    u32 num_keys;
-    u32 num_bound_samplers;
-    u32 num_separate_samplers;
-    u32 num_bindless_samplers;
-    if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
-        !file.ReadObject(is_texture_handler_size_known) ||
-        !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
-        !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
-        !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
-        !file.ReadObject(num_bindless_samplers)) {
-        return false;
-    }
-    if (is_texture_handler_size_known) {
-        texture_handler_size = texture_handler_size_value;
-    }
-
-    std::vector<ConstBufferKey> flat_keys(num_keys);
-    std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
-    std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
-    std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
-    if (file.Read(flat_keys) != flat_keys.size() ||
-        file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
-        file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
-        file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
-        return false;
-    }
-    for (const auto& entry : flat_keys) {
-        keys.insert({{entry.cbuf, entry.offset}, entry.value});
-    }
-    for (const auto& entry : flat_bound_samplers) {
-        bound_samplers.emplace(entry.offset, entry.sampler);
-    }
-    for (const auto& entry : flat_separate_samplers) {
-        SeparateSamplerKey key;
-        key.buffers = {entry.cbuf1, entry.cbuf2};
-        key.offsets = {entry.offset1, entry.offset2};
-        separate_samplers.emplace(key, entry.sampler);
-    }
-    for (const auto& entry : flat_bindless_samplers) {
-        bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
-    }
-
-    return true;
-}
-
-bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
-    if (!file.WriteObject(static_cast<u32>(type)) ||
-        !file.WriteObject(static_cast<u32>(code.size())) ||
-        !file.WriteObject(static_cast<u32>(code_b.size()))) {
-        return false;
-    }
-    if (file.Write(code) != code.size()) {
-        return false;
-    }
-    if (HasProgramA() && file.Write(code_b) != code_b.size()) {
-        return false;
-    }
-
-    if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
-        !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
-        !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
-        !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
-        !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
-        !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
-        !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
-        return false;
-    }
-
-    std::vector<ConstBufferKey> flat_keys;
-    flat_keys.reserve(keys.size());
-    for (const auto& [address, value] : keys) {
-        flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
-    }
-
-    std::vector<BoundSamplerEntry> flat_bound_samplers;
-    flat_bound_samplers.reserve(bound_samplers.size());
-    for (const auto& [address, sampler] : bound_samplers) {
-        flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
-    }
-
-    std::vector<SeparateSamplerEntry> flat_separate_samplers;
-    flat_separate_samplers.reserve(separate_samplers.size());
-    for (const auto& [key, sampler] : separate_samplers) {
-        SeparateSamplerEntry entry;
-        std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
-        std::tie(entry.offset1, entry.offset2) = key.offsets;
-        entry.sampler = sampler;
-        flat_separate_samplers.push_back(entry);
-    }
-
-    std::vector<BindlessSamplerEntry> flat_bindless_samplers;
-    flat_bindless_samplers.reserve(bindless_samplers.size());
-    for (const auto& [address, sampler] : bindless_samplers) {
-        flat_bindless_samplers.push_back(
-            BindlessSamplerEntry{address.first, address.second, sampler});
-    }
-
-    return file.Write(flat_keys) == flat_keys.size() &&
-           file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
-           file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
-           file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
-}
-
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
-
-ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
-
-void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
-    title_id = title_id_;
-}
-
-std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
-    // Skip games without title id
-    const bool has_title_id = title_id != 0;
-    if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
-        return std::nullopt;
-    }
-
-    Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
-                            Common::FS::FileType::BinaryFile};
-    if (!file.IsOpen()) {
-        LOG_INFO(Render_OpenGL, "No transferable shader cache found");
-        is_usable = true;
-        return std::nullopt;
-    }
-
-    u32 version{};
-    if (!file.ReadObject(version)) {
-        LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
-        return std::nullopt;
-    }
-
-    if (version < NativeVersion) {
-        LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
-        file.Close();
-        InvalidateTransferable();
-        is_usable = true;
-        return std::nullopt;
-    }
-    if (version > NativeVersion) {
-        LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
-                                   "of the emulator, skipping");
-        return std::nullopt;
-    }
-
-    // Version is valid, load the shaders
-    std::vector<ShaderDiskCacheEntry> entries;
-    while (static_cast<u64>(file.Tell()) < file.GetSize()) {
-        ShaderDiskCacheEntry& entry = entries.emplace_back();
-        if (!entry.Load(file)) {
-            LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
-            return std::nullopt;
-        }
-    }
-
-    is_usable = true;
-    return {std::move(entries)};
-}
-
-std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
-    if (!is_usable) {
-        return {};
-    }
-
-    Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
-                            Common::FS::FileType::BinaryFile};
-    if (!file.IsOpen()) {
-        LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
-        return {};
-    }
-
-    if (const auto result = LoadPrecompiledFile(file)) {
-        return *result;
-    }
-
-    LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
-    file.Close();
-    InvalidatePrecompiled();
-    return {};
-}
-
-std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
-    Common::FS::IOFile& file) {
-    // Read compressed file from disk and decompress to virtual precompiled cache file
-    std::vector<u8> compressed(file.GetSize());
-    if (file.Read(compressed) != file.GetSize()) {
-        return std::nullopt;
-    }
-    const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
-    SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
-    precompiled_cache_virtual_file_offset = 0;
-
-    ShaderCacheVersionHash file_hash{};
-    if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
-        precompiled_cache_virtual_file_offset = 0;
-        return std::nullopt;
-    }
-    if (GetShaderCacheVersionHash() != file_hash) {
-        LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
-        precompiled_cache_virtual_file_offset = 0;
-        return std::nullopt;
-    }
-
-    std::vector<ShaderDiskCachePrecompiled> entries;
-    while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
-        u32 binary_size;
-        auto& entry = entries.emplace_back();
-        if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
-            !LoadObjectFromPrecompiled(entry.binary_format) ||
-            !LoadObjectFromPrecompiled(binary_size)) {
-            return std::nullopt;
-        }
-
-        entry.binary.resize(binary_size);
-        if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
-            return std::nullopt;
-        }
-    }
-    return entries;
-}
-
-void ShaderDiskCacheOpenGL::InvalidateTransferable() {
-    if (!Common::FS::RemoveFile(GetTransferablePath())) {
-        LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
-                  Common::FS::PathToUTF8String(GetTransferablePath()));
-    }
-    InvalidatePrecompiled();
-}
-
-void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
-    // Clear virtaul precompiled cache file
-    precompiled_cache_virtual_file.Resize(0);
-
-    if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
-        LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
-                  Common::FS::PathToUTF8String(GetPrecompiledPath()));
-    }
-}
-
-void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
-    if (!is_usable) {
-        return;
-    }
-
-    const u64 id = entry.unique_identifier;
-    if (stored_transferable.contains(id)) {
-        // The shader already exists
-        return;
-    }
-
-    Common::FS::IOFile file = AppendTransferableFile();
-    if (!file.IsOpen()) {
-        return;
-    }
-    if (!entry.Save(file)) {
-        LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
-        file.Close();
-        InvalidateTransferable();
-        return;
-    }
-
-    stored_transferable.insert(id);
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
-    if (!is_usable) {
-        return;
-    }
-
-    // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
-    // when writing the dump. This should be done the moment I get access to write to the virtual
-    // file.
-    if (precompiled_cache_virtual_file.GetSize() == 0) {
-        SavePrecompiledHeaderToVirtualPrecompiledCache();
-    }
-
-    GLint binary_length;
-    glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
-
-    GLenum binary_format;
-    std::vector<u8> binary(binary_length);
-    glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
-
-    if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
-        !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
-        !SaveArrayToPrecompiled(binary.data(), binary.size())) {
-        LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
-                  unique_identifier);
-        InvalidatePrecompiled();
-    }
-}
-
-Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
-    if (!EnsureDirectories()) {
-        return {};
-    }
-
-    const auto transferable_path{GetTransferablePath()};
-    const bool existed = Common::FS::Exists(transferable_path);
-
-    Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
-                            Common::FS::FileType::BinaryFile};
-    if (!file.IsOpen()) {
-        LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
-                  Common::FS::PathToUTF8String(transferable_path));
-        return {};
-    }
-    if (!existed || file.GetSize() == 0) {
-        // If the file didn't exist, write its version
-        if (!file.WriteObject(NativeVersion)) {
-            LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
-                      Common::FS::PathToUTF8String(transferable_path));
-            return {};
-        }
-    }
-    return file;
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
-    const auto hash{GetShaderCacheVersionHash()};
-    if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
-        LOG_ERROR(
-            Render_OpenGL,
-            "Failed to write precompiled cache version hash to virtual precompiled cache file");
-    }
-}
-
-void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
-    precompiled_cache_virtual_file_offset = 0;
-    const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
-    const std::vector<u8> compressed =
-        Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
-
-    const auto precompiled_path = GetPrecompiledPath();
-    Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
-                            Common::FS::FileType::BinaryFile};
-
-    if (!file.IsOpen()) {
-        LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
-                  Common::FS::PathToUTF8String(precompiled_path));
-        return;
-    }
-    if (file.Write(compressed) != compressed.size()) {
-        LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
-                  Common::FS::PathToUTF8String(precompiled_path));
-    }
-}
-
-bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
-    const auto CreateDir = [](const std::filesystem::path& dir) {
-        if (!Common::FS::CreateDir(dir)) {
-            LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
-                      Common::FS::PathToUTF8String(dir));
-            return false;
-        }
-        return true;
-    };
-
-    return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
-           CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
-           CreateDir(GetPrecompiledDir());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
-    return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
-    return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
-    return GetBaseDir() / "transferable";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
-    return GetBaseDir() / "precompiled";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
-    return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
-}
-
-std::string ShaderDiskCacheOpenGL::GetTitleID() const {
-    return fmt::format("{:016X}", title_id);
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <filesystem>
-#include <optional>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/file_sys/vfs_vector.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-
-namespace Common::FS {
-class IOFile;
-}
-
-namespace OpenGL {
-
-using ProgramCode = std::vector<u64>;
-
-/// Describes a shader and how it's used by the guest GPU
-struct ShaderDiskCacheEntry {
-    ShaderDiskCacheEntry();
-    ~ShaderDiskCacheEntry();
-
-    bool Load(Common::FS::IOFile& file);
-
-    bool Save(Common::FS::IOFile& file) const;
-
-    bool HasProgramA() const {
-        return !code.empty() && !code_b.empty();
-    }
-
-    Tegra::Engines::ShaderType type{};
-    ProgramCode code;
-    ProgramCode code_b;
-
-    u64 unique_identifier = 0;
-    std::optional<u32> texture_handler_size;
-    u32 bound_buffer = 0;
-    VideoCommon::Shader::GraphicsInfo graphics_info;
-    VideoCommon::Shader::ComputeInfo compute_info;
-    VideoCommon::Shader::KeyMap keys;
-    VideoCommon::Shader::BoundSamplerMap bound_samplers;
-    VideoCommon::Shader::SeparateSamplerMap separate_samplers;
-    VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
-};
-
-/// Contains an OpenGL dumped binary program
-struct ShaderDiskCachePrecompiled {
-    u64 unique_identifier = 0;
-    GLenum binary_format = 0;
-    std::vector<u8> binary;
-};
-
-class ShaderDiskCacheOpenGL {
-public:
-    explicit ShaderDiskCacheOpenGL();
-    ~ShaderDiskCacheOpenGL();
-
-    /// Binds a title ID for all future operations.
-    void BindTitleID(u64 title_id);
-
-    /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
-    std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
-
-    /// Loads current game's precompiled cache. Invalidates on failure.
-    std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
-
-    /// Removes the transferable (and precompiled) cache file.
-    void InvalidateTransferable();
-
-    /// Removes the precompiled cache file and clears virtual precompiled cache file.
-    void InvalidatePrecompiled();
-
-    /// Saves a raw dump to the transferable file. Checks for collisions.
-    void SaveEntry(const ShaderDiskCacheEntry& entry);
-
-    /// Saves a dump entry to the precompiled file. Does not check for collisions.
-    void SavePrecompiled(u64 unique_identifier, GLuint program);
-
-    /// Serializes virtual precompiled shader cache file to real file
-    void SaveVirtualPrecompiledFile();
-
-private:
-    /// Loads the transferable cache. Returns empty on failure.
-    std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
-        Common::FS::IOFile& file);
-
-    /// Opens current game's transferable file and write it's header if it doesn't exist
-    Common::FS::IOFile AppendTransferableFile() const;
-
-    /// Save precompiled header to precompiled_cache_in_memory
-    void SavePrecompiledHeaderToVirtualPrecompiledCache();
-
-    /// Create shader disk cache directories. Returns true on success.
-    bool EnsureDirectories() const;
-
-    /// Gets current game's transferable file path
-    std::filesystem::path GetTransferablePath() const;
-
-    /// Gets current game's precompiled file path
-    std::filesystem::path GetPrecompiledPath() const;
-
-    /// Get user's transferable directory path
-    std::filesystem::path GetTransferableDir() const;
-
-    /// Get user's precompiled directory path
-    std::filesystem::path GetPrecompiledDir() const;
-
-    /// Get user's shader directory path
-    std::filesystem::path GetBaseDir() const;
-
-    /// Get current game's title id
-    std::string GetTitleID() const;
-
-    template <typename T>
-    bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
-        const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
-            data, length, precompiled_cache_virtual_file_offset);
-        precompiled_cache_virtual_file_offset += write_length;
-        return write_length == sizeof(T) * length;
-    }
-
-    template <typename T>
-    bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
-        const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
-            data, length, precompiled_cache_virtual_file_offset);
-        precompiled_cache_virtual_file_offset += read_length;
-        return read_length == sizeof(T) * length;
-    }
-
-    template <typename T>
-    bool SaveObjectToPrecompiled(const T& object) {
-        return SaveArrayToPrecompiled(&object, 1);
-    }
-
-    bool SaveObjectToPrecompiled(bool object) {
-        const auto value = static_cast<u8>(object);
-        return SaveArrayToPrecompiled(&value, 1);
-    }
-
-    template <typename T>
-    bool LoadObjectFromPrecompiled(T& object) {
-        return LoadArrayFromPrecompiled(&object, 1);
-    }
-
-    // Stores whole precompiled cache which will be read from or saved to the precompiled chache
-    // file
-    FileSys::VectorVfsFile precompiled_cache_virtual_file;
-    // Stores the current offset of the precompiled cache file for IO purposes
-    std::size_t precompiled_cache_virtual_file_offset = 0;
-
-    // Stored transferable shaders
-    std::unordered_set<u64> stored_transferable;
-
-    /// Title ID to operate on
-    u64 title_id = 0;
-
-    // The cache has been loaded at boot
-    bool is_usable = false;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 553e6e8d6..399959afb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,149 +1,3 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
-
-namespace OpenGL {
-
-namespace {
-
-void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
-    if (current == old) {
-        return;
-    }
-    if (current == 0) {
-        if (enabled) {
-            enabled = false;
-            glDisable(stage);
-        }
-        return;
-    }
-    if (!enabled) {
-        enabled = true;
-        glEnable(stage);
-    }
-    glBindProgramARB(stage, current);
-}
-
-} // Anonymous namespace
-
-ProgramManager::ProgramManager(const Device& device)
-    : use_assembly_programs{device.UseAssemblyShaders()} {
-    if (use_assembly_programs) {
-        glEnable(GL_COMPUTE_PROGRAM_NV);
-    } else {
-        graphics_pipeline.Create();
-        glBindProgramPipeline(graphics_pipeline.handle);
-    }
-}
-
-ProgramManager::~ProgramManager() = default;
-
-void ProgramManager::BindCompute(GLuint program) {
-    if (use_assembly_programs) {
-        glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
-    } else {
-        is_graphics_bound = false;
-        glUseProgram(program);
-    }
-}
-
-void ProgramManager::BindGraphicsPipeline() {
-    if (!use_assembly_programs) {
-        UpdateSourcePrograms();
-    }
-}
-
-void ProgramManager::BindHostPipeline(GLuint pipeline) {
-    if (use_assembly_programs) {
-        if (geometry_enabled) {
-            geometry_enabled = false;
-            old_state.geometry = 0;
-            glDisable(GL_GEOMETRY_PROGRAM_NV);
-        }
-    } else {
-        if (!is_graphics_bound) {
-            glUseProgram(0);
-        }
-    }
-    glBindProgramPipeline(pipeline);
-}
-
-void ProgramManager::RestoreGuestPipeline() {
-    if (use_assembly_programs) {
-        glBindProgramPipeline(0);
-    } else {
-        glBindProgramPipeline(graphics_pipeline.handle);
-    }
-}
-
-void ProgramManager::BindHostCompute(GLuint program) {
-    if (use_assembly_programs) {
-        glDisable(GL_COMPUTE_PROGRAM_NV);
-    }
-    glUseProgram(program);
-    is_graphics_bound = false;
-}
-
-void ProgramManager::RestoreGuestCompute() {
-    if (use_assembly_programs) {
-        glEnable(GL_COMPUTE_PROGRAM_NV);
-        glUseProgram(0);
-    }
-}
-
-void ProgramManager::UseVertexShader(GLuint program) {
-    if (use_assembly_programs) {
-        BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
-    }
-    current_state.vertex = program;
-}
-
-void ProgramManager::UseGeometryShader(GLuint program) {
-    if (use_assembly_programs) {
-        BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
-    }
-    current_state.geometry = program;
-}
-
-void ProgramManager::UseFragmentShader(GLuint program) {
-    if (use_assembly_programs) {
-        BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
-    }
-    current_state.fragment = program;
-}
-
-void ProgramManager::UpdateSourcePrograms() {
-    if (!is_graphics_bound) {
-        is_graphics_bound = true;
-        glUseProgram(0);
-    }
-
-    const GLuint handle = graphics_pipeline.handle;
-    const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
-        if (current == old) {
-            return;
-        }
-        glUseProgramStages(handle, stage, current);
-    };
-    update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
-    update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
-    update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
-
-    old_state = current_state;
-}
-
-void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
-    const auto& regs = maxwell.regs;
-
-    // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
-    y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index ad42cce74..d7ef0775d 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,79 +4,142 @@
 
 #pragma once
 
-#include <cstddef>
+#include <array>
+#include <span>
 
 #include <glad/glad.h>
 
+#include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/maxwell_to_gl.h"
 
 namespace OpenGL {
 
-class Device;
-
-/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-///       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-///       Not following that rule will cause problems on some AMD drivers.
-struct alignas(16) MaxwellUniformData {
-    void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
-
-    GLfloat y_direction;
-};
-static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
-static_assert(sizeof(MaxwellUniformData) < 16384,
-              "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
-
 class ProgramManager {
-public:
-    explicit ProgramManager(const Device& device);
-    ~ProgramManager();
-
-    /// Binds a compute program
-    void BindCompute(GLuint program);
-
-    /// Updates bound programs.
-    void BindGraphicsPipeline();
-
-    /// Binds an OpenGL pipeline object unsynchronized with the guest state.
-    void BindHostPipeline(GLuint pipeline);
-
-    /// Rewinds BindHostPipeline state changes.
-    void RestoreGuestPipeline();
-
-    /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
-    void BindHostCompute(GLuint program);
+    static constexpr size_t NUM_STAGES = 5;
 
-    /// Rewinds BindHostCompute state changes.
-    void RestoreGuestCompute();
-
-    void UseVertexShader(GLuint program);
-    void UseGeometryShader(GLuint program);
-    void UseFragmentShader(GLuint program);
-
-private:
-    struct PipelineState {
-        GLuint vertex = 0;
-        GLuint geometry = 0;
-        GLuint fragment = 0;
+    static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
+        GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+        GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
     };
 
-    /// Update GLSL programs.
-    void UpdateSourcePrograms();
-
-    OGLPipeline graphics_pipeline;
-
-    PipelineState current_state;
-    PipelineState old_state;
-
-    bool use_assembly_programs = false;
-
-    bool is_graphics_bound = true;
+public:
+    explicit ProgramManager(const Device& device) {
+        glCreateProgramPipelines(1, &pipeline.handle);
+        if (device.UseAssemblyShaders()) {
+            glEnable(GL_COMPUTE_PROGRAM_NV);
+        }
+    }
+
+    void BindComputeProgram(GLuint program) {
+        glUseProgram(program);
+        is_compute_bound = true;
+    }
+
+    void BindComputeAssemblyProgram(GLuint program) {
+        if (current_assembly_compute_program != program) {
+            current_assembly_compute_program = program;
+            glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+        }
+        UnbindPipeline();
+    }
+
+    void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
+        static constexpr std::array<GLenum, 5> stage_enums{
+            GL_VERTEX_SHADER_BIT,   GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
+            GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
+        };
+        for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+            if (current_programs[stage] != programs[stage].handle) {
+                current_programs[stage] = programs[stage].handle;
+                glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
+            }
+        }
+        BindPipeline();
+    }
+
+    void BindPresentPrograms(GLuint vertex, GLuint fragment) {
+        if (current_programs[0] != vertex) {
+            current_programs[0] = vertex;
+            glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
+        }
+        if (current_programs[4] != fragment) {
+            current_programs[4] = fragment;
+            glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
+        }
+        glUseProgramStages(
+            pipeline.handle,
+            GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
+        current_programs[1] = 0;
+        current_programs[2] = 0;
+        current_programs[3] = 0;
+
+        if (current_stage_mask != 0) {
+            current_stage_mask = 0;
+            for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
+                glDisable(program_type);
+            }
+        }
+        BindPipeline();
+    }
+
+    void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
+                              u32 stage_mask) {
+        const u32 changed_mask = current_stage_mask ^ stage_mask;
+        current_stage_mask = stage_mask;
+
+        if (changed_mask != 0) {
+            for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+                if (((changed_mask >> stage) & 1) != 0) {
+                    if (((stage_mask >> stage) & 1) != 0) {
+                        glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+                    } else {
+                        glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+                    }
+                }
+            }
+        }
+        for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+            if (current_programs[stage] != programs[stage].handle) {
+                current_programs[stage] = programs[stage].handle;
+                glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
+            }
+        }
+        UnbindPipeline();
+    }
+
+    void RestoreGuestCompute() {}
 
-    bool vertex_enabled = false;
-    bool geometry_enabled = false;
-    bool fragment_enabled = false;
+private:
+    void BindPipeline() {
+        if (!is_pipeline_bound) {
+            is_pipeline_bound = true;
+            glBindProgramPipeline(pipeline.handle);
+        }
+        UnbindCompute();
+    }
+
+    void UnbindPipeline() {
+        if (is_pipeline_bound) {
+            is_pipeline_bound = false;
+            glBindProgramPipeline(0);
+        }
+        UnbindCompute();
+    }
+
+    void UnbindCompute() {
+        if (is_compute_bound) {
+            is_compute_bound = false;
+            glUseProgram(0);
+        }
+    }
+
+    OGLPipeline pipeline;
+    bool is_pipeline_bound{};
+    bool is_compute_bound{};
+
+    u32 current_stage_mask = 0;
+    std::array<GLuint, NUM_STAGES> current_programs{};
+    GLuint current_assembly_compute_program = 0;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 4bf0d6090..d432072ad 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -5,57 +5,108 @@
 #include <string_view>
 #include <vector>
 #include <glad/glad.h>
+
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/settings.h"
 #include "video_core/renderer_opengl/gl_shader_util.h"
 
-namespace OpenGL::GLShader {
+namespace OpenGL {
 
-namespace {
+static OGLProgram LinkSeparableProgram(GLuint shader) {
+    OGLProgram program;
+    program.handle = glCreateProgram();
+    glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+    glAttachShader(program.handle, shader);
+    glLinkProgram(program.handle);
+    if (!Settings::values.renderer_debug) {
+        return program;
+    }
+    GLint link_status{};
+    glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
 
-std::string_view StageDebugName(GLenum type) {
-    switch (type) {
-    case GL_VERTEX_SHADER:
-        return "vertex";
-    case GL_GEOMETRY_SHADER:
-        return "geometry";
-    case GL_FRAGMENT_SHADER:
-        return "fragment";
-    case GL_COMPUTE_SHADER:
-        return "compute";
+    GLint log_length{};
+    glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
+    if (log_length == 0) {
+        return program;
+    }
+    std::string log(log_length, 0);
+    glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
+    if (link_status == GL_FALSE) {
+        LOG_ERROR(Render_OpenGL, "{}", log);
+    } else {
+        LOG_WARNING(Render_OpenGL, "{}", log);
     }
-    UNIMPLEMENTED();
-    return "unknown";
+    return program;
 }
 
-} // Anonymous namespace
+static void LogShader(GLuint shader, std::string_view code = {}) {
+    GLint shader_status{};
+    glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
+    if (shader_status == GL_FALSE) {
+        LOG_ERROR(Render_OpenGL, "Failed to build shader");
+    }
+    GLint log_length{};
+    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+    if (log_length == 0) {
+        return;
+    }
+    std::string log(log_length, 0);
+    glGetShaderInfoLog(shader, log_length, nullptr, log.data());
+    if (shader_status == GL_FALSE) {
+        LOG_ERROR(Render_OpenGL, "{}", log);
+        if (!code.empty()) {
+            LOG_INFO(Render_OpenGL, "\n{}", code);
+        }
+    } else {
+        LOG_WARNING(Render_OpenGL, "{}", log);
+    }
+}
 
-GLuint LoadShader(std::string_view source, GLenum type) {
-    const std::string_view debug_type = StageDebugName(type);
-    const GLuint shader_id = glCreateShader(type);
+OGLProgram CreateProgram(std::string_view code, GLenum stage) {
+    OGLShader shader;
+    shader.handle = glCreateShader(stage);
 
-    const GLchar* source_string = source.data();
-    const GLint source_length = static_cast<GLint>(source.size());
+    const GLint length = static_cast<GLint>(code.size());
+    const GLchar* const code_ptr = code.data();
+    glShaderSource(shader.handle, 1, &code_ptr, &length);
+    glCompileShader(shader.handle);
+    if (Settings::values.renderer_debug) {
+        LogShader(shader.handle, code);
+    }
+    return LinkSeparableProgram(shader.handle);
+}
 
-    glShaderSource(shader_id, 1, &source_string, &source_length);
-    LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
-    glCompileShader(shader_id);
+OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
+    OGLShader shader;
+    shader.handle = glCreateShader(stage);
 
-    GLint result = GL_FALSE;
-    GLint info_log_length;
-    glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
-    glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
+    glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
+                   static_cast<GLsizei>(code.size_bytes()));
+    glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
+    if (Settings::values.renderer_debug) {
+        LogShader(shader.handle);
+    }
+    return LinkSeparableProgram(shader.handle);
+}
 
-    if (info_log_length > 1) {
-        std::string shader_error(info_log_length, ' ');
-        glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
-        if (result == GL_TRUE) {
-            LOG_DEBUG(Render_OpenGL, "{}", shader_error);
-        } else {
-            LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
+OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
+    OGLAssemblyProgram program;
+    glGenProgramsARB(1, &program.handle);
+    glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
+                            static_cast<GLsizei>(code.size()), code.data());
+    if (Settings::values.renderer_debug) {
+        const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
+        if (err && *err) {
+            if (std::strstr(err, "error")) {
+                LOG_CRITICAL(Render_OpenGL, "\n{}", err);
+                LOG_INFO(Render_OpenGL, "\n{}", code);
+            } else {
+                LOG_WARNING(Render_OpenGL, "\n{}", err);
+            }
         }
     }
-    return shader_id;
+    return program;
 }
 
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 1b770532e..4e1a2a8e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -4,92 +4,23 @@
 
 #pragma once
 
+#include <span>
 #include <string>
+#include <string_view>
 #include <vector>
+
 #include <glad/glad.h>
+
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
 
-namespace OpenGL::GLShader {
-
-/**
- * Utility function to log the source code of a list of shaders.
- * @param shaders The OpenGL shaders whose source we will print.
- */
-template <typename... T>
-void LogShaderSource(T... shaders) {
-    auto shader_list = {shaders...};
-
-    for (const auto& shader : shader_list) {
-        if (shader == 0)
-            continue;
-
-        GLint source_length;
-        glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
-
-        std::string source(source_length, ' ');
-        glGetShaderSource(shader, source_length, nullptr, &source[0]);
-        LOG_INFO(Render_OpenGL, "Shader source {}", source);
-    }
-}
-
-/**
- * Utility function to create and compile an OpenGL GLSL shader
- * @param source String of the GLSL shader program
- * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
- */
-GLuint LoadShader(std::string_view source, GLenum type);
-
-/**
- * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
- * @param separable_program whether to create a separable program
- * @param shaders ID of shaders to attach to the program
- * @returns Handle of the newly created OpenGL program object
- */
-template <typename... T>
-GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
-    // Link the program
-    LOG_DEBUG(Render_OpenGL, "Linking program...");
-
-    GLuint program_id = glCreateProgram();
-
-    ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
-
-    if (separable_program) {
-        glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
-    }
-    if (hint_retrievable) {
-        glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
-    }
-
-    glLinkProgram(program_id);
-
-    // Check the program
-    GLint result = GL_FALSE;
-    GLint info_log_length;
-    glGetProgramiv(program_id, GL_LINK_STATUS, &result);
-    glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
-    if (info_log_length > 1) {
-        std::string program_error(info_log_length, ' ');
-        glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
-        if (result == GL_TRUE) {
-            LOG_DEBUG(Render_OpenGL, "{}", program_error);
-        } else {
-            LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
-        }
-    }
-
-    if (result == GL_FALSE) {
-        // There was a problem linking the shader, print the source for debugging purposes.
-        LogShaderSource(shaders...);
-    }
+namespace OpenGL {
 
-    ASSERT_MSG(result == GL_TRUE, "Shader not linked");
+OGLProgram CreateProgram(std::string_view code, GLenum stage);
 
-    ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
+OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
 
-    return program_id;
-}
+OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
 
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index dbdf5230f..586da84e3 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
     FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
 }
 
-void SetupDirtyShaders(Tables& tables) {
-    FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
-              Shaders);
-}
-
 void SetupDirtyPolygonModes(Tables& tables) {
     tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
     tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
     SetupDirtyScissors(tables);
     SetupDirtyVertexInstances(tables);
     SetupDirtyVertexFormat(tables);
-    SetupDirtyShaders(tables);
     SetupDirtyPolygonModes(tables);
     SetupDirtyDepthTest(tables);
     SetupDirtyStencilTest(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 94c905116..5864c7c07 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -52,7 +52,6 @@ enum : u8 {
     BlendState0,
     BlendState7 = BlendState0 + 7,
 
-    Shaders,
     ClipDistances,
 
     PolygonModes,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index ff0f03e99..c373c9cb4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,9 +24,7 @@
 #include "video_core/textures/decoders.h"
 
 namespace OpenGL {
-
 namespace {
-
 using Tegra::Texture::SwizzleSource;
 using Tegra::Texture::TextureMipmapFilter;
 using Tegra::Texture::TextureType;
@@ -59,107 +57,6 @@ struct CopyRegion {
     GLsizei depth;
 };
 
-struct FormatTuple {
-    GLenum internal_format;
-    GLenum format = GL_NONE;
-    GLenum type = GL_NONE;
-};
-
-constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // A8B8G8R8_UNORM
-    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                               // A8B8G8R8_SNORM
-    {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                            // A8B8G8R8_SINT
-    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                  // A8B8G8R8_UINT
-    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                     // R5G6B5_UNORM
-    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                 // B5G6R5_UNORM
-    {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM
-    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM
-    {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
-    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM
-    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM
-    {GL_R8_SNORM, GL_RED, GL_BYTE},                                   // R8_SNORM
-    {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                // R8_SINT
-    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                      // R8_UINT
-    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                             // R16G16B16A16_FLOAT
-    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                          // R16G16B16A16_UNORM
-    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                             // R16G16B16A16_SNORM
-    {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                          // R16G16B16A16_SINT
-    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                // R16G16B16A16_UINT
-    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},     // B10G11R11_FLOAT
-    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                  // R32G32B32A32_UINT
-    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                               // BC1_RGBA_UNORM
-    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                               // BC2_UNORM
-    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                               // BC3_UNORM
-    {GL_COMPRESSED_RED_RGTC1},                                        // BC4_UNORM
-    {GL_COMPRESSED_SIGNED_RED_RGTC1},                                 // BC4_SNORM
-    {GL_COMPRESSED_RG_RGTC2},                                         // BC5_UNORM
-    {GL_COMPRESSED_SIGNED_RG_RGTC2},                                  // BC5_SNORM
-    {GL_COMPRESSED_RGBA_BPTC_UNORM},                                  // BC7_UNORM
-    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT
-    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT
-    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM
-    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT
-    {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT
-    {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT
-    {GL_RG32I, GL_RG_INTEGER, GL_INT},                                // R32G32_SINT
-    {GL_R32F, GL_RED, GL_FLOAT},                                      // R32_FLOAT
-    {GL_R16F, GL_RED, GL_HALF_FLOAT},                                 // R16_FLOAT
-    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                              // R16_UNORM
-    {GL_R16_SNORM, GL_RED, GL_SHORT},                                 // R16_SNORM
-    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                    // R16_UINT
-    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                              // R16_SINT
-    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                              // R16G16_UNORM
-    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                 // R16G16_FLOAT
-    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                    // R16G16_UINT
-    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                              // R16G16_SINT
-    {GL_RG16_SNORM, GL_RG, GL_SHORT},                                 // R16G16_SNORM
-    {GL_RGB32F, GL_RGB, GL_FLOAT},                                    // R32G32B32_FLOAT
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},          // A8B8G8R8_SRGB
-    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                // R8G8_UNORM
-    {GL_RG8_SNORM, GL_RG, GL_BYTE},                                   // R8G8_SNORM
-    {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                // R8G8_SINT
-    {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                      // R8G8_UINT
-    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                      // R32G32_UINT
-    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16X16_FLOAT
-    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                      // R32_UINT
-    {GL_R32I, GL_RED_INTEGER, GL_INT},                                // R32_SINT
-    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM
-    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM
-    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},                            // BC7_SRGB
-    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV},               // A4B4G4R4_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},                        // ASTC_2D_4X4_SRGB
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},                        // ASTC_2D_8X8_SRGB
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},                        // ASTC_2D_8X5_SRGB
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},                        // ASTC_2D_5X4_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                                // ASTC_2D_5X5_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},                        // ASTC_2D_5X5_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                               // ASTC_2D_10X8_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},                       // ASTC_2D_10X8_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                                // ASTC_2D_6X6_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},                        // ASTC_2D_6X6_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                              // ASTC_2D_10X10_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},                      // ASTC_2D_10X10_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                              // ASTC_2D_12X12_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},                      // ASTC_2D_12X12_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                                // ASTC_2D_8X6_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},                        // ASTC_2D_8X6_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                                // ASTC_2D_6X5_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},                        // ASTC_2D_6X5_SRGB
-    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                // E5B9G9R9_FLOAT
-    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},            // D32_FLOAT
-    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},    // D16_UNORM
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // D24_UNORM_S8_UINT
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // S8_UINT_D24_UNORM
-    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
-     GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
-}};
-
 constexpr std::array ACCELERATED_FORMATS{
     GL_RGBA32F,   GL_RGBA16F,   GL_RG32F,    GL_RG16F,        GL_R11F_G11F_B10F, GL_R32F,
     GL_R16F,      GL_RGBA32UI,  GL_RGBA16UI, GL_RGB10_A2UI,   GL_RGBA8UI,        GL_RG32UI,
@@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
     GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
 };
 
-const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
-    ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
-    return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
-}
-
 GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
     switch (info.type) {
     case ImageType::e1D:
@@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
     return GL_NONE;
 }
 
-GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
+GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
     const bool is_multisampled = num_samples > 1;
     switch (type) {
-    case ImageViewType::e1D:
+    case Shader::TextureType::Color1D:
         return GL_TEXTURE_1D;
-    case ImageViewType::e2D:
+    case Shader::TextureType::Color2D:
         return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
-    case ImageViewType::Cube:
+    case Shader::TextureType::ColorCube:
         return GL_TEXTURE_CUBE_MAP;
-    case ImageViewType::e3D:
+    case Shader::TextureType::Color3D:
         return GL_TEXTURE_3D;
-    case ImageViewType::e1DArray:
+    case Shader::TextureType::ColorArray1D:
         return GL_TEXTURE_1D_ARRAY;
-    case ImageViewType::e2DArray:
+    case Shader::TextureType::ColorArray2D:
         return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
-    case ImageViewType::CubeArray:
+    case Shader::TextureType::ColorArrayCube:
         return GL_TEXTURE_CUBE_MAP_ARRAY;
-    case ImageViewType::Rect:
-        return GL_TEXTURE_RECTANGLE;
-    case ImageViewType::Buffer:
+    case Shader::TextureType::Buffer:
         return GL_TEXTURE_BUFFER;
     }
     UNREACHABLE_MSG("Invalid image view type={}", type);
@@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
     default:
         return false;
     }
-    const GLenum internal_format = GetFormatTuple(info.format).internal_format;
+    const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
     const auto& format_info = runtime.FormatInfo(info.type, internal_format);
     if (format_info.is_compressed) {
         return false;
@@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
 
 void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
     if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
-        const GLuint texture = image_view->DefaultHandle();
-        glNamedFramebufferTexture(fbo, attachment, texture, 0);
+        glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
         return;
     }
-    const GLuint texture = image_view->Handle(ImageViewType::e3D);
+    const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
     if (image_view->range.extent.layers > 1) {
         // TODO: OpenGL doesn't support rendering to a fixed number of slices
         glNamedFramebufferTexture(fbo, attachment, texture, 0);
@@ -439,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
     }
 }
 
+[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) {
+    switch (format) {
+    case Shader::ImageFormat::Typeless:
+        break;
+    case Shader::ImageFormat::R8_SINT:
+        return GL_R8I;
+    case Shader::ImageFormat::R8_UINT:
+        return GL_R8UI;
+    case Shader::ImageFormat::R16_UINT:
+        return GL_R16UI;
+    case Shader::ImageFormat::R16_SINT:
+        return GL_R16I;
+    case Shader::ImageFormat::R32_UINT:
+        return GL_R32UI;
+    case Shader::ImageFormat::R32G32_UINT:
+        return GL_RG32UI;
+    case Shader::ImageFormat::R32G32B32A32_UINT:
+        return GL_RGBA32UI;
+    }
+    UNREACHABLE_MSG("Invalid image format={}", format);
+    return GL_R32UI;
+}
 } // Anonymous namespace
 
 ImageBufferMap::~ImageBufferMap() {
@@ -453,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
     static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
     for (size_t i = 0; i < TARGETS.size(); ++i) {
         const GLenum target = TARGETS[i];
-        for (const FormatTuple& tuple : FORMAT_TABLE) {
+        for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
             const GLenum format = tuple.internal_format;
             GLint compat_class;
             GLint compat_type;
@@ -475,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
     null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
     null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
     null_image_3d.Create(GL_TEXTURE_3D);
-    null_image_rect.Create(GL_TEXTURE_RECTANGLE);
     glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
     glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
     glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
-    glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
 
     std::array<GLuint, 4> new_handles;
     glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@@ -496,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
     glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
                   GL_R8, 0, 1, 0, 6);
     const std::array texture_handles{
-        null_image_1d_array.handle,      null_image_cube_array.handle, null_image_3d.handle,
-        null_image_rect.handle,          null_image_view_1d.handle,    null_image_view_2d.handle,
-        null_image_view_2d_array.handle, null_image_view_cube.handle,
+        null_image_1d_array.handle,  null_image_cube_array.handle, null_image_3d.handle,
+        null_image_view_1d.handle,   null_image_view_2d.handle,    null_image_view_2d_array.handle,
+        null_image_view_cube.handle,
     };
     for (const GLuint handle : texture_handles) {
         static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
         glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
     }
-    const auto set_view = [this](ImageViewType type, GLuint handle) {
+    const auto set_view = [this](Shader::TextureType type, GLuint handle) {
         if (device.HasDebuggingToolAttached()) {
             const std::string name = fmt::format("NullImage {}", type);
             glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
         }
         null_image_views[static_cast<size_t>(type)] = handle;
     };
-    set_view(ImageViewType::e1D, null_image_view_1d.handle);
-    set_view(ImageViewType::e2D, null_image_view_2d.handle);
-    set_view(ImageViewType::Cube, null_image_view_cube.handle);
-    set_view(ImageViewType::e3D, null_image_3d.handle);
-    set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
-    set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
-    set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
-    set_view(ImageViewType::Rect, null_image_rect.handle);
+    set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
+    set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
+    set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
+    set_view(Shader::TextureType::Color3D, null_image_3d.handle);
+    set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
+    set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
+    set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
 }
 
 TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -710,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
         gl_format = GL_RGBA;
         gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
     } else {
-        const auto& tuple = GetFormatTuple(info.format);
+        const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
         gl_internal_format = tuple.internal_format;
         gl_format = tuple.format;
         gl_type = tuple.type;
@@ -750,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
         glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
         break;
     case GL_TEXTURE_BUFFER:
-        buffer.Create();
-        glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
+        UNREACHABLE();
         break;
     default:
         UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@@ -789,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
     }
 }
 
-void Image::UploadMemory(const ImageBufferMap& map,
-                         std::span<const VideoCommon::BufferCopy> copies) {
-    for (const VideoCommon::BufferCopy& copy : copies) {
-        glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
-                                 copy.dst_offset, copy.size);
-    }
-}
-
 void Image::DownloadMemory(ImageBufferMap& map,
                            std::span<const VideoCommon::BufferImageCopy> copies) {
     glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@@ -958,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
     if (True(image.flags & ImageFlagBits::Converted)) {
         internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
     } else {
-        internal_format = GetFormatTuple(format).internal_format;
+        internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
+    }
+    full_range = info.range;
+    flat_range = info.range;
+    set_object_label = device.HasDebuggingToolAttached();
+    is_render_target = info.IsRenderTarget();
+    original_texture = image.texture.handle;
+    num_samples = image.info.num_samples;
+    if (!is_render_target) {
+        swizzle[0] = info.x_source;
+        swizzle[1] = info.y_source;
+        swizzle[2] = info.z_source;
+        swizzle[3] = info.w_source;
     }
-    VideoCommon::SubresourceRange flatten_range = info.range;
-    std::array<GLuint, 2> handles;
-    stored_views.reserve(2);
-
     switch (info.type) {
     case ImageViewType::e1DArray:
-        flatten_range.extent.layers = 1;
+        flat_range.extent.layers = 1;
         [[fallthrough]];
     case ImageViewType::e1D:
-        glGenTextures(2, handles.data());
-        SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
-        SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
+        SetupView(Shader::TextureType::Color1D);
+        SetupView(Shader::TextureType::ColorArray1D);
         break;
     case ImageViewType::e2DArray:
-        flatten_range.extent.layers = 1;
+        flat_range.extent.layers = 1;
         [[fallthrough]];
     case ImageViewType::e2D:
         if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
@@ -984,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
                 .base = {.level = info.range.base.level, .layer = 0},
                 .extent = {.levels = 1, .layers = 1},
             };
-            glGenTextures(1, handles.data());
-            SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
-            break;
+            full_range = slice_range;
+
+            SetupView(Shader::TextureType::Color3D);
+        } else {
+            SetupView(Shader::TextureType::Color2D);
+            SetupView(Shader::TextureType::ColorArray2D);
         }
-        glGenTextures(2, handles.data());
-        SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
-        SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
         break;
     case ImageViewType::e3D:
-        glGenTextures(1, handles.data());
-        SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
+        SetupView(Shader::TextureType::Color3D);
         break;
     case ImageViewType::CubeArray:
-        flatten_range.extent.layers = 6;
+        flat_range.extent.layers = 6;
         [[fallthrough]];
     case ImageViewType::Cube:
-        glGenTextures(2, handles.data());
-        SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
-        SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
+        SetupView(Shader::TextureType::ColorCube);
+        SetupView(Shader::TextureType::ColorArrayCube);
         break;
     case ImageViewType::Rect:
-        glGenTextures(1, handles.data());
-        SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
+        UNIMPLEMENTED();
         break;
     case ImageViewType::Buffer:
-        glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
-        SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
+        UNREACHABLE();
+        break;
+    }
+    switch (info.type) {
+    case ImageViewType::e1D:
+        default_handle = Handle(Shader::TextureType::Color1D);
+        break;
+    case ImageViewType::e1DArray:
+        default_handle = Handle(Shader::TextureType::ColorArray1D);
+        break;
+    case ImageViewType::e2D:
+        default_handle = Handle(Shader::TextureType::Color2D);
+        break;
+    case ImageViewType::e2DArray:
+        default_handle = Handle(Shader::TextureType::ColorArray2D);
+        break;
+    case ImageViewType::e3D:
+        default_handle = Handle(Shader::TextureType::Color3D);
+        break;
+    case ImageViewType::Cube:
+        default_handle = Handle(Shader::TextureType::ColorCube);
+        break;
+    case ImageViewType::CubeArray:
+        default_handle = Handle(Shader::TextureType::ColorArrayCube);
+        break;
+    default:
         break;
     }
-    default_handle = Handle(info.type);
 }
 
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+                     const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
+    : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+      buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
+
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+                     const VideoCommon::ImageViewInfo& view_info)
+    : VideoCommon::ImageViewBase{info, view_info} {}
+
 ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
     : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
 
-void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
-                          GLuint handle, const VideoCommon::ImageViewInfo& info,
-                          VideoCommon::SubresourceRange view_range) {
-    if (info.type == ImageViewType::Buffer) {
-        // TODO: Take offset from buffer cache
-        glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
-                             image.guest_size_bytes);
-    } else {
-        const GLuint parent = image.texture.handle;
-        const GLenum target = ImageTarget(view_type, image.info.num_samples);
-        glTextureView(handle, target, parent, internal_format, view_range.base.level,
-                      view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
-        if (!info.IsRenderTarget()) {
-            ApplySwizzle(handle, format, info.Swizzle());
-        }
+GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
+    if (image_format == Shader::ImageFormat::Typeless) {
+        return Handle(texture_type);
+    }
+    const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
+                         image_format == Shader::ImageFormat::R16_SINT};
+    if (!storage_views) {
+        storage_views = std::make_unique<StorageViews>();
+    }
+    auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
+    GLuint& view{type_views[static_cast<size_t>(texture_type)]};
+    if (view == 0) {
+        view = MakeView(texture_type, ShaderFormat(image_format));
+    }
+    return view;
+}
+
+void ImageView::SetupView(Shader::TextureType view_type) {
+    views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
+}
+
+GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
+    VideoCommon::SubresourceRange view_range;
+    switch (view_type) {
+    case Shader::TextureType::Color1D:
+    case Shader::TextureType::Color2D:
+    case Shader::TextureType::ColorCube:
+        view_range = flat_range;
+        break;
+    case Shader::TextureType::ColorArray1D:
+    case Shader::TextureType::ColorArray2D:
+    case Shader::TextureType::Color3D:
+    case Shader::TextureType::ColorArrayCube:
+        view_range = full_range;
+        break;
+    default:
+        UNREACHABLE();
     }
-    if (device.HasDebuggingToolAttached()) {
-        const std::string name = VideoCommon::Name(*this, view_type);
-        glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+    OGLTextureView& view = stored_views.emplace_back();
+    view.Create();
+
+    const GLenum target = ImageTarget(view_type, num_samples);
+    glTextureView(view.handle, target, original_texture, view_format, view_range.base.level,
+                  view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
+    if (!is_render_target) {
+        std::array<SwizzleSource, 4> casted_swizzle;
+        std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) {
+            return static_cast<SwizzleSource>(component_swizzle);
+        });
+        ApplySwizzle(view.handle, format, casted_swizzle);
+    }
+    if (set_object_label) {
+        const std::string name = VideoCommon::Name(*this);
+        glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
     }
-    stored_views.emplace_back().handle = handle;
-    views[static_cast<size_t>(view_type)] = handle;
+    return view.handle;
 }
 
 Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index cf3b789e3..921072ebe 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -9,6 +9,7 @@
 
 #include <glad/glad.h>
 
+#include "shader_recompiler/shader_info.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/util_shaders.h"
 #include "video_core/texture_cache/texture_cache.h"
@@ -127,13 +128,12 @@ private:
     OGLTexture null_image_1d_array;
     OGLTexture null_image_cube_array;
     OGLTexture null_image_3d;
-    OGLTexture null_image_rect;
     OGLTextureView null_image_view_1d;
     OGLTextureView null_image_view_2d;
     OGLTextureView null_image_view_2d_array;
     OGLTextureView null_image_view_cube;
 
-    std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
+    std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
 };
 
 class Image : public VideoCommon::ImageBase {
@@ -154,8 +154,6 @@ public:
     void UploadMemory(const ImageBufferMap& map,
                       std::span<const VideoCommon::BufferImageCopy> copies);
 
-    void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
-
     void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
 
     GLuint StorageHandle() noexcept;
@@ -170,7 +168,6 @@ private:
     void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
 
     OGLTexture texture;
-    OGLBuffer buffer;
     OGLTextureView store_view;
     GLenum gl_internal_format = GL_NONE;
     GLenum gl_format = GL_NONE;
@@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase {
 
 public:
     explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+    explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
+                       const VideoCommon::ImageViewInfo&, GPUVAddr);
+    explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+                       const VideoCommon::ImageViewInfo& view_info);
     explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
 
-    [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
-        return views[static_cast<size_t>(query_type)];
+    [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
+                                     Shader::ImageFormat image_format);
+
+    [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
+        return views[static_cast<size_t>(handle_type)];
     }
 
     [[nodiscard]] GLuint DefaultHandle() const noexcept {
@@ -196,15 +200,38 @@ public:
         return internal_format;
     }
 
+    [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
+        return gpu_addr;
+    }
+
+    [[nodiscard]] u32 BufferSize() const noexcept {
+        return buffer_size;
+    }
+
 private:
-    void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
-                   const VideoCommon::ImageViewInfo& info,
-                   VideoCommon::SubresourceRange view_range);
+    struct StorageViews {
+        std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
+        std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
+    };
+
+    void SetupView(Shader::TextureType view_type);
+
+    GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
 
-    std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
+    std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
     std::vector<OGLTextureView> stored_views;
-    GLuint default_handle = 0;
+    std::unique_ptr<StorageViews> storage_views;
     GLenum internal_format = GL_NONE;
+    GLuint default_handle = 0;
+    GPUVAddr gpu_addr = 0;
+    u32 buffer_size = 0;
+    GLuint original_texture = 0;
+    int num_samples = 0;
+    VideoCommon::SubresourceRange flat_range;
+    VideoCommon::SubresourceRange full_range;
+    std::array<u8, 4> swizzle{};
+    bool set_object_label = false;
+    bool is_render_target = false;
 };
 
 class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index f7ad8f370..672f94bfc 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -5,12 +5,120 @@
 #pragma once
 
 #include <glad/glad.h>
+
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
 
 namespace OpenGL::MaxwellToGL {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
+struct FormatTuple {
+    GLenum internal_format;
+    GLenum format = GL_NONE;
+    GLenum type = GL_NONE;
+};
+
+constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // A8B8G8R8_UNORM
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                               // A8B8G8R8_SNORM
+    {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                            // A8B8G8R8_SINT
+    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                  // A8B8G8R8_UINT
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                     // R5G6B5_UNORM
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                 // B5G6R5_UNORM
+    {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM
+    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM
+    {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM
+    {GL_R8_SNORM, GL_RED, GL_BYTE},                                   // R8_SNORM
+    {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                // R8_SINT
+    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                      // R8_UINT
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                             // R16G16B16A16_FLOAT
+    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                          // R16G16B16A16_UNORM
+    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                             // R16G16B16A16_SNORM
+    {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                          // R16G16B16A16_SINT
+    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                // R16G16B16A16_UINT
+    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},     // B10G11R11_FLOAT
+    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                  // R32G32B32A32_UINT
+    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                               // BC1_RGBA_UNORM
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                               // BC2_UNORM
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                               // BC3_UNORM
+    {GL_COMPRESSED_RED_RGTC1},                                        // BC4_UNORM
+    {GL_COMPRESSED_SIGNED_RED_RGTC1},                                 // BC4_SNORM
+    {GL_COMPRESSED_RG_RGTC2},                                         // BC5_UNORM
+    {GL_COMPRESSED_SIGNED_RG_RGTC2},                                  // BC5_SNORM
+    {GL_COMPRESSED_RGBA_BPTC_UNORM},                                  // BC7_UNORM
+    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT
+    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT
+    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM
+    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT
+    {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT
+    {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT
+    {GL_RG32I, GL_RG_INTEGER, GL_INT},                                // R32G32_SINT
+    {GL_R32F, GL_RED, GL_FLOAT},                                      // R32_FLOAT
+    {GL_R16F, GL_RED, GL_HALF_FLOAT},                                 // R16_FLOAT
+    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                              // R16_UNORM
+    {GL_R16_SNORM, GL_RED, GL_SHORT},                                 // R16_SNORM
+    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                    // R16_UINT
+    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                              // R16_SINT
+    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                              // R16G16_UNORM
+    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                 // R16G16_FLOAT
+    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                    // R16G16_UINT
+    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                              // R16G16_SINT
+    {GL_RG16_SNORM, GL_RG, GL_SHORT},                                 // R16G16_SNORM
+    {GL_RGB32F, GL_RGB, GL_FLOAT},                                    // R32G32B32_FLOAT
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},          // A8B8G8R8_SRGB
+    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                // R8G8_UNORM
+    {GL_RG8_SNORM, GL_RG, GL_BYTE},                                   // R8G8_SNORM
+    {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                // R8G8_SINT
+    {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                      // R8G8_UINT
+    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                      // R32G32_UINT
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16X16_FLOAT
+    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                      // R32_UINT
+    {GL_R32I, GL_RED_INTEGER, GL_INT},                                // R32_SINT
+    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM
+    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM
+    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},                            // BC7_SRGB
+    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV},               // A4B4G4R4_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},                        // ASTC_2D_4X4_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},                        // ASTC_2D_8X8_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},                        // ASTC_2D_8X5_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},                        // ASTC_2D_5X4_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                                // ASTC_2D_5X5_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},                        // ASTC_2D_5X5_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                               // ASTC_2D_10X8_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},                       // ASTC_2D_10X8_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                                // ASTC_2D_6X6_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},                        // ASTC_2D_6X6_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                              // ASTC_2D_10X10_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},                      // ASTC_2D_10X10_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                              // ASTC_2D_12X12_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},                      // ASTC_2D_12X12_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                                // ASTC_2D_8X6_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},                        // ASTC_2D_8X6_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                                // ASTC_2D_6X5_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},                        // ASTC_2D_6X5_SRGB
+    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                // E5B9G9R9_FLOAT
+    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},            // D32_FLOAT
+    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},    // D16_UNORM
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // D24_UNORM_S8_UINT
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // S8_UINT_D24_UNORM
+    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
+     GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
+}};
+
+inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
+    ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
+    return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
+}
+
 inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
     switch (attrib.type) {
     case Maxwell::VertexAttribute::Type::UnsignedNorm:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index c12929de6..285e78384 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -25,6 +25,7 @@
 #include "video_core/host_shaders/opengl_present_vert.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
 #include "video_core/textures/decoders.h"
 
@@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
     }
     AddTelemetryFields();
     InitOpenGLObjects();
+
+    // Initialize default attributes to match hardware's disabled attributes
+    GLint max_attribs{};
+    glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs);
+    for (GLint attrib = 0; attrib < max_attribs; ++attrib) {
+        glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f);
+    }
+    // Enable seamless cubemaps when per texture parameters are not available
+    if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
+        glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
+    }
+    // Enable unified vertex attributes and query vertex buffer address when the driver supports it
+    if (device.HasVertexBufferUnifiedMemory()) {
+        glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+        glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
+
+        glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
+        glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
+                                         &vertex_buffer_address);
+    }
 }
 
 RendererOpenGL::~RendererOpenGL() = default;
@@ -230,18 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
 
 void RendererOpenGL::InitOpenGLObjects() {
     // Create shader programs
-    OGLShader vertex_shader;
-    vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
-
-    OGLShader fragment_shader;
-    fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
-
-    vertex_program.Create(true, false, vertex_shader.handle);
-    fragment_program.Create(true, false, fragment_shader.handle);
-
-    pipeline.Create();
-    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
-    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+    present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
+    present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
 
     // Generate presentation sampler
     present_sampler.Create();
@@ -263,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() {
 
     // Clear screen to black
     LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
-
-    // Enable seamless cubemaps when per texture parameters are not available
-    if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
-        glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
-    }
-
-    // Enable unified vertex attributes and query vertex buffer address when the driver supports it
-    if (device.HasVertexBufferUnifiedMemory()) {
-        glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
-        glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
-
-        glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
-        glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
-                                         &vertex_buffer_address);
-    }
 }
 
 void RendererOpenGL::AddTelemetryFields() {
@@ -342,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     // Set projection matrix
     const std::array ortho_matrix =
         MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
-    glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
-                                std::data(ortho_matrix));
+    program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
+    glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
+                                ortho_matrix.data());
 
     const auto& texcoords = screen_info.display_texcoords;
     auto left = texcoords.left;
@@ -404,8 +401,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     state_tracker.NotifyClipControl();
     state_tracker.NotifyAlphaTest();
 
-    program_manager.BindHostPipeline(pipeline.handle);
-
     state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
     glEnable(GL_CULL_FACE);
     if (screen_info.display_srgb) {
@@ -453,7 +448,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     glClear(GL_COLOR_BUFFER_BIT);
     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 
-    program_manager.RestoreGuestPipeline();
+    // TODO
+    // program_manager.RestoreGuestPipeline();
 }
 
 void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 0b66f8332..d455f572f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,6 @@
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 
 namespace Core {
@@ -111,9 +110,8 @@ private:
     // OpenGL object IDs
     OGLSampler present_sampler;
     OGLBuffer vertex_buffer;
-    OGLProgram vertex_program;
-    OGLProgram fragment_program;
-    OGLPipeline pipeline;
+    OGLProgram present_vertex;
+    OGLProgram present_fragment;
     OGLFramebuffer screenshot_framebuffer;
 
     // GPU address of the vertex buffer
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 8fb5be393..37a4d1d9d 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -16,8 +16,8 @@
 #include "video_core/host_shaders/opengl_copy_bc4_comp.h"
 #include "video_core/host_shaders/opengl_copy_bgra_comp.h"
 #include "video_core/host_shaders/pitch_unswizzle_comp.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 #include "video_core/renderer_opengl/util_shaders.h"
 #include "video_core/texture_cache/accelerated_swizzle.h"
@@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
 using VideoCore::Surface::BytesPerBlock;
 
 namespace {
-
 OGLProgram MakeProgram(std::string_view source) {
-    OGLShader shader;
-    shader.Create(source, GL_COMPUTE_SHADER);
-
-    OGLProgram program;
-    program.Create(true, false, shader.handle);
-    return program;
+    return CreateProgram(source, GL_COMPUTE_SHADER);
 }
 
 size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
     return static_cast<size_t>(copy.extent.width * copy.extent.height *
                                copy.src_subresource.num_layers);
 }
-
 } // Anonymous namespace
 
 UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
         .width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
         .height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
     };
-    program_manager.BindHostCompute(astc_decoder_program.handle);
+    program_manager.BindComputeProgram(astc_decoder_program.handle);
     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
 
@@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
     static constexpr GLuint BINDING_INPUT_BUFFER = 1;
     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
 
-    program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
+    program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
 
@@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
 
     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
-    program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
+    program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
 
     const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
     UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
                          "Non-power of two images are not implemented");
 
-    program_manager.BindHostCompute(pitch_unswizzle_program.handle);
+    program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
     glUniform2ui(LOC_ORIGIN, 0, 0);
     glUniform2i(LOC_DESTINATION, 0, 0);
@@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
     static constexpr GLuint LOC_SRC_OFFSET = 0;
     static constexpr GLuint LOC_DST_OFFSET = 1;
 
-    program_manager.BindHostCompute(copy_bc4_program.handle);
+    program_manager.BindComputeProgram(copy_bc4_program.handle);
 
     for (const ImageCopy& copy : copies) {
         ASSERT(copy.src_subresource.base_layer == 0);
@@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
         break;
     case 4: {
         // BGRA8 copy
-        program_manager.BindHostCompute(copy_bgra_program.handle);
+        program_manager.BindComputeProgram(copy_bgra_program.handle);
         constexpr GLenum FORMAT = GL_RGBA8;
         for (const ImageCopy& copy : copies) {
             ASSERT(copy.src_offset == zero_offset);
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b7f5b8bc2..6c1b2f063 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA
     .bindingCount = 1,
     .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
 };
+template <u32 num_textures>
+inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{
+    .uniform_buffers = 0,
+    .storage_buffers = 0,
+    .texture_buffers = 0,
+    .image_buffers = 0,
+    .textures = num_textures,
+    .images = 0,
+    .score = 2,
+};
 constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
     .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
     .pNext = nullptr,
@@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
     cmdbuf.SetScissor(0, scissor);
     cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
 }
-
 } // Anonymous namespace
 
 BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
-                                 StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
+                                 StateTracker& state_tracker_, DescriptorPool& descriptor_pool)
     : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
       one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
           ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
       two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
           TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
-      one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
-      two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
+      one_texture_descriptor_allocator{
+          descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
+      two_textures_descriptor_allocator{
+          descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
       one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
           PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
       two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
@@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
         .operation = operation,
     };
     const VkPipelineLayout layout = *one_texture_pipeline_layout;
-    const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
+    const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
     const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
     const VkPipeline pipeline = FindOrEmplacePipeline(key);
-    const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
     scheduler.RequestRenderpass(dst_framebuffer);
-    scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
-                      &device = device](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
+                      src_view](vk::CommandBuffer cmdbuf) {
         // TODO: Barriers
+        const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
         UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
@@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
     const VkPipelineLayout layout = *two_textures_pipeline_layout;
     const VkSampler sampler = *nearest_sampler;
     const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
-    const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
     scheduler.RequestRenderpass(dst_framebuffer);
     scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
-                      src_stencil_view, descriptor_set,
-                      &device = device](vk::CommandBuffer cmdbuf) {
+                      src_stencil_view, this](vk::CommandBuffer cmdbuf) {
         // TODO: Barriers
+        const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
         UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
                                        src_stencil_view);
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
 
 void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
                                       const ImageView& src_image_view) {
-
     ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
     Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
 }
@@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
 void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
                               const ImageView& src_image_view) {
     const VkPipelineLayout layout = *one_texture_pipeline_layout;
-    const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
+    const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
     const VkSampler sampler = *nearest_sampler;
-    const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
     const VkExtent2D extent{
         .width = src_image_view.size.width,
         .height = src_image_view.size.height,
     };
     scheduler.RequestRenderpass(dst_framebuffer);
-    scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
-                      &device = device](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
         const VkOffset2D offset{
             .x = 0,
             .y = 0,
@@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
             .tex_scale = {viewport.width, viewport.height},
             .tex_offset = {0.0f, 0.0f},
         };
+        const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
         UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
 
         // TODO: Barriers
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 0d81a06ed..33ee095c1 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -31,7 +31,7 @@ struct BlitImagePipelineKey {
 class BlitImageHelper {
 public:
     explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
-                             StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
+                             StateTracker& state_tracker, DescriptorPool& descriptor_pool);
     ~BlitImageHelper();
 
     void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 362278f01..d70153df3 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -15,9 +15,7 @@
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
 
 namespace Vulkan {
-
 namespace {
-
 constexpr size_t POINT = 0;
 constexpr size_t LINE = 1;
 constexpr size_t POLYGON = 2;
@@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
     POLYGON, // Patches
 };
 
+void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
+    std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
+        return VideoCommon::TransformFeedbackState::Layout{
+            .stream = layout.stream,
+            .varying_count = layout.varying_count,
+            .stride = layout.stride,
+        };
+    });
+    state.varyings = regs.tfb_varying_locs;
+}
 } // Anonymous namespace
 
 void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
-                                 bool has_extended_dynamic_state) {
+                                 bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
     const Maxwell& regs = maxwell3d.regs;
     const std::array enabled_lut{
         regs.polygon_offset_point_enable,
@@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
     const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
 
     raw1 = 0;
+    extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
+    dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
+    xfb_enabled.Assign(regs.tfb_enabled != 0);
     primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
     depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
     depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
     tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
     logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
     logic_op.Assign(PackLogicOp(regs.logic_op.operation));
-    rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
     topology.Assign(regs.draw.topology);
     msaa_mode.Assign(regs.multisample_mode);
 
     raw2 = 0;
+    rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
     const auto test_func =
         regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
     alpha_test_func.Assign(PackComparisonOp(test_func));
     early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
-
+    depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
+    depth_format.Assign(static_cast<u32>(regs.zeta.format));
+    y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0);
+    provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0);
+    conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0);
+    smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0);
+
+    for (size_t i = 0; i < regs.rt.size(); ++i) {
+        color_formats[i] = static_cast<u8>(regs.rt[i].format);
+    }
     alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
     point_size = Common::BitCast<u32>(regs.point_size);
 
-    if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
-        maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
-        for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
-            const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
-            binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
-        }
-    }
-    if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
-        maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
-        for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
-            const auto& input = regs.vertex_attrib_format[index];
-            auto& attribute = attributes[index];
-            attribute.raw = 0;
-            attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
-            attribute.buffer.Assign(input.buffer);
-            attribute.offset.Assign(input.offset);
-            attribute.type.Assign(static_cast<u32>(input.type.Value()));
-            attribute.size.Assign(static_cast<u32>(input.size.Value()));
+    if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
+        if (has_dynamic_vertex_input) {
+            // Dirty flag will be reset by the command buffer update
+            static constexpr std::array LUT{
+                0u, // Invalid
+                1u, // SignedNorm
+                1u, // UnsignedNorm
+                2u, // SignedInt
+                3u, // UnsignedInt
+                1u, // UnsignedScaled
+                1u, // SignedScaled
+                1u, // Float
+            };
+            const auto& attrs = regs.vertex_attrib_format;
+            attribute_types = 0;
+            for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
+                const u32 mask = attrs[i].constant != 0 ? 0 : 3;
+                const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
+                attribute_types |= static_cast<u64>(type & mask) << (i * 2);
+            }
+        } else {
+            maxwell3d.dirty.flags[Dirty::VertexInput] = false;
+            enabled_divisors = 0;
+            for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+                const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
+                binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
+                enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
+            }
+            for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+                const auto& input = regs.vertex_attrib_format[index];
+                auto& attribute = attributes[index];
+                attribute.raw = 0;
+                attribute.enabled.Assign(input.constant ? 0 : 1);
+                attribute.buffer.Assign(input.buffer);
+                attribute.offset.Assign(input.offset);
+                attribute.type.Assign(static_cast<u32>(input.type.Value()));
+                attribute.size.Assign(static_cast<u32>(input.size.Value()));
+            }
         }
     }
     if (maxwell3d.dirty.flags[Dirty::Blending]) {
@@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
             return static_cast<u16>(viewport.swizzle.raw);
         });
     }
-    if (!has_extended_dynamic_state) {
-        no_extended_dynamic_state.Assign(1);
+    if (!extended_dynamic_state) {
         dynamic_state.Refresh(regs);
     }
+    if (xfb_enabled) {
+        RefreshXfbState(xfb_state, regs);
+    }
 }
 
 void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index a0eb83a68..c9be37935 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -12,6 +12,7 @@
 
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/surface.h"
+#include "video_core/transform_feedback.h"
 
 namespace Vulkan {
 
@@ -60,7 +61,7 @@ struct FixedPipelineState {
 
         void Refresh(const Maxwell& regs, size_t index);
 
-        constexpr std::array<bool, 4> Mask() const noexcept {
+        std::array<bool, 4> Mask() const noexcept {
             return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
         }
 
@@ -97,11 +98,11 @@ struct FixedPipelineState {
         BitField<20, 3, u32> type;
         BitField<23, 6, u32> size;
 
-        constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
+        Maxwell::VertexAttribute::Type Type() const noexcept {
             return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
         }
 
-        constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
+        Maxwell::VertexAttribute::Size Size() const noexcept {
             return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
         }
     };
@@ -167,37 +168,53 @@ struct FixedPipelineState {
 
     union {
         u32 raw1;
-        BitField<0, 1, u32> no_extended_dynamic_state;
-        BitField<2, 1, u32> primitive_restart_enable;
-        BitField<3, 1, u32> depth_bias_enable;
-        BitField<4, 1, u32> depth_clamp_disabled;
-        BitField<5, 1, u32> ndc_minus_one_to_one;
-        BitField<6, 2, u32> polygon_mode;
-        BitField<8, 5, u32> patch_control_points_minus_one;
-        BitField<13, 2, u32> tessellation_primitive;
-        BitField<15, 2, u32> tessellation_spacing;
-        BitField<17, 1, u32> tessellation_clockwise;
-        BitField<18, 1, u32> logic_op_enable;
-        BitField<19, 4, u32> logic_op;
-        BitField<23, 1, u32> rasterize_enable;
+        BitField<0, 1, u32> extended_dynamic_state;
+        BitField<1, 1, u32> dynamic_vertex_input;
+        BitField<2, 1, u32> xfb_enabled;
+        BitField<3, 1, u32> primitive_restart_enable;
+        BitField<4, 1, u32> depth_bias_enable;
+        BitField<5, 1, u32> depth_clamp_disabled;
+        BitField<6, 1, u32> ndc_minus_one_to_one;
+        BitField<7, 2, u32> polygon_mode;
+        BitField<9, 5, u32> patch_control_points_minus_one;
+        BitField<14, 2, u32> tessellation_primitive;
+        BitField<16, 2, u32> tessellation_spacing;
+        BitField<18, 1, u32> tessellation_clockwise;
+        BitField<19, 1, u32> logic_op_enable;
+        BitField<20, 4, u32> logic_op;
         BitField<24, 4, Maxwell::PrimitiveTopology> topology;
         BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
     };
     union {
         u32 raw2;
-        BitField<0, 3, u32> alpha_test_func;
-        BitField<3, 1, u32> early_z;
+        BitField<0, 1, u32> rasterize_enable;
+        BitField<1, 3, u32> alpha_test_func;
+        BitField<4, 1, u32> early_z;
+        BitField<5, 1, u32> depth_enabled;
+        BitField<6, 5, u32> depth_format;
+        BitField<11, 1, u32> y_negate;
+        BitField<12, 1, u32> provoking_vertex_last;
+        BitField<13, 1, u32> conservative_raster_enable;
+        BitField<14, 1, u32> smooth_lines;
     };
+    std::array<u8, Maxwell::NumRenderTargets> color_formats;
 
     u32 alpha_test_ref;
     u32 point_size;
-    std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
-    std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
     std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
     std::array<u16, Maxwell::NumViewports> viewport_swizzles;
+    union {
+        u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
+        u64 enabled_divisors;
+    };
+    std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
+    std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
+
     DynamicState dynamic_state;
+    VideoCommon::TransformFeedbackState xfb_state;
 
-    void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
+    void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
+                 bool has_dynamic_vertex_input);
 
     size_t Hash() const noexcept;
 
@@ -208,8 +225,24 @@ struct FixedPipelineState {
     }
 
     size_t Size() const noexcept {
-        const size_t total_size = sizeof *this;
-        return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
+        if (xfb_enabled) {
+            // When transform feedback is enabled, use the whole struct
+            return sizeof(*this);
+        }
+        if (dynamic_vertex_input) {
+            // Exclude dynamic state and attributes
+            return offsetof(FixedPipelineState, attributes);
+        }
+        if (extended_dynamic_state) {
+            // Exclude dynamic state
+            return offsetof(FixedPipelineState, dynamic_state);
+        }
+        // Default
+        return offsetof(FixedPipelineState, xfb_state);
+    }
+
+    u32 DynamicAttributeType(size_t index) const noexcept {
+        return (attribute_types >> (index * 2)) & 0b11;
     }
 };
 static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f088447e9..68a23b602 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -157,7 +157,7 @@ struct FormatTuple {
     {VK_FORMAT_R32_SFLOAT, Attachable | Storage},              // R32_FLOAT
     {VK_FORMAT_R16_SFLOAT, Attachable | Storage},              // R16_FLOAT
     {VK_FORMAT_R16_UNORM, Attachable | Storage},               // R16_UNORM
-    {VK_FORMAT_UNDEFINED},                                     // R16_SNORM
+    {VK_FORMAT_R16_SNORM, Attachable | Storage},               // R16_SNORM
     {VK_FORMAT_R16_UINT, Attachable | Storage},                // R16_UINT
     {VK_FORMAT_UNDEFINED},                                     // R16_SINT
     {VK_FORMAT_R16G16_UNORM, Attachable | Storage},            // R16G16_UNORM
@@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
     return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
 }
 
-VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
+VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
     switch (stage) {
-    case Tegra::Engines::ShaderType::Vertex:
+    case Shader::Stage::VertexA:
+    case Shader::Stage::VertexB:
         return VK_SHADER_STAGE_VERTEX_BIT;
-    case Tegra::Engines::ShaderType::TesselationControl:
+    case Shader::Stage::TessellationControl:
         return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
-    case Tegra::Engines::ShaderType::TesselationEval:
+    case Shader::Stage::TessellationEval:
         return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
-    case Tegra::Engines::ShaderType::Geometry:
+    case Shader::Stage::Geometry:
         return VK_SHADER_STAGE_GEOMETRY_BIT;
-    case Tegra::Engines::ShaderType::Fragment:
+    case Shader::Stage::Fragment:
         return VK_SHADER_STAGE_FRAGMENT_BIT;
-    case Tegra::Engines::ShaderType::Compute:
+    case Shader::Stage::Compute:
         return VK_SHADER_STAGE_COMPUTE_BIT;
     }
     UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
@@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) {
     return {};
 }
 
+VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) {
+    switch (polygon_mode) {
+    case Maxwell::PolygonMode::Point:
+        return VK_POLYGON_MODE_POINT;
+    case Maxwell::PolygonMode::Line:
+        return VK_POLYGON_MODE_LINE;
+    case Maxwell::PolygonMode::Fill:
+        return VK_POLYGON_MODE_FILL;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode);
+    return {};
+}
+
 VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
     switch (swizzle) {
     case Tegra::Texture::SwizzleSource::Zero:
@@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
     return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
 }
 
+VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
+    switch (msaa_mode) {
+    case Tegra::Texture::MsaaMode::Msaa1x1:
+        return VK_SAMPLE_COUNT_1_BIT;
+    case Tegra::Texture::MsaaMode::Msaa2x1:
+    case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
+        return VK_SAMPLE_COUNT_2_BIT;
+    case Tegra::Texture::MsaaMode::Msaa2x2:
+    case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
+    case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
+        return VK_SAMPLE_COUNT_4_BIT;
+    case Tegra::Texture::MsaaMode::Msaa4x2:
+    case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
+    case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
+    case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
+        return VK_SAMPLE_COUNT_8_BIT;
+    case Tegra::Texture::MsaaMode::Msaa4x4:
+        return VK_SAMPLE_COUNT_16_BIT;
+    default:
+        UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
+        return VK_SAMPLE_COUNT_1_BIT;
+    }
+}
+
 } // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index e3e06ba38..8a9616039 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "common/common_types.h"
+#include "shader_recompiler/stage.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/surface.h"
 #include "video_core/textures/texture.h"
@@ -45,7 +46,7 @@ struct FormatInfo {
 [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
                                        PixelFormat pixel_format);
 
-VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
+VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
 
 VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
 
@@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face);
 
 VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
 
+VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
+
 VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
 
 VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
 
 VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
 
+VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
+
 } // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
new file mode 100644
index 000000000..4847db6b6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -0,0 +1,154 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+
+#include <boost/container/small_vector.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+
+namespace Vulkan {
+
+class DescriptorLayoutBuilder {
+public:
+    DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
+
+    bool CanUsePushDescriptor() const noexcept {
+        return device->IsKhrPushDescriptorSupported() &&
+               num_descriptors <= device->MaxPushDescriptors();
+    }
+
+    vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
+        if (bindings.empty()) {
+            return nullptr;
+        }
+        const VkDescriptorSetLayoutCreateFlags flags =
+            use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
+        return device->GetLogical().CreateDescriptorSetLayout({
+            .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = flags,
+            .bindingCount = static_cast<u32>(bindings.size()),
+            .pBindings = bindings.data(),
+        });
+    }
+
+    vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
+                                                   VkPipelineLayout pipeline_layout,
+                                                   bool use_push_descriptor) const {
+        if (entries.empty()) {
+            return nullptr;
+        }
+        const VkDescriptorUpdateTemplateType type =
+            use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
+                                : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
+        return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
+            .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+            .pNext = nullptr,
+            .flags = 0,
+            .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
+            .pDescriptorUpdateEntries = entries.data(),
+            .templateType = type,
+            .descriptorSetLayout = descriptor_set_layout,
+            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+            .pipelineLayout = pipeline_layout,
+            .set = 0,
+        });
+    }
+
+    vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
+        return device->GetLogical().CreatePipelineLayout({
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .setLayoutCount = descriptor_set_layout ? 1U : 0U,
+            .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
+            .pushConstantRangeCount = 0,
+            .pPushConstantRanges = nullptr,
+        });
+    }
+
+    void Add(const Shader::Info& info, VkShaderStageFlags stage) {
+        Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
+        Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
+        Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
+        Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
+        Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
+        Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
+    }
+
+private:
+    template <typename Descriptors>
+    void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
+        const size_t num{descriptors.size()};
+        for (size_t i = 0; i < num; ++i) {
+            bindings.push_back({
+                .binding = binding,
+                .descriptorType = type,
+                .descriptorCount = descriptors[i].count,
+                .stageFlags = stage,
+                .pImmutableSamplers = nullptr,
+            });
+            entries.push_back({
+                .dstBinding = binding,
+                .dstArrayElement = 0,
+                .descriptorCount = descriptors[i].count,
+                .descriptorType = type,
+                .offset = offset,
+                .stride = sizeof(DescriptorUpdateEntry),
+            });
+            ++binding;
+            num_descriptors += descriptors[i].count;
+            offset += sizeof(DescriptorUpdateEntry);
+        }
+    }
+
+    const Device* device{};
+    boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
+    boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
+    u32 binding{};
+    u32 num_descriptors{};
+    size_t offset{};
+};
+
+inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
+                                 const ImageId*& image_view_ids, TextureCache& texture_cache,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue) {
+    for (const auto& desc : info.texture_buffer_descriptors) {
+        image_view_ids += desc.count;
+    }
+    for (const auto& desc : info.image_buffer_descriptors) {
+        image_view_ids += desc.count;
+    }
+    for (const auto& desc : info.texture_descriptors) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            const VkSampler sampler{*(samplers++)};
+            ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+            const VkImageView vk_image_view{image_view.Handle(desc.type)};
+            update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+        }
+    }
+    for (const auto& desc : info.image_descriptors) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+            if (desc.is_written) {
+                texture_cache.MarkModification(image_view.image_id);
+            }
+            const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
+            update_descriptor_queue.AddImage(vk_image_view);
+        }
+    }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index bec3a81d9..a8d04dc61 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     if (!framebuffer) {
         return;
     }
-    const auto& layout = render_window.GetFramebufferLayout();
-    if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
-        const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
-        const bool use_accelerated =
-            rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
-        const bool is_srgb = use_accelerated && screen_info.is_srgb;
-        if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
-            swapchain.Create(layout.width, layout.height, is_srgb);
-            blit_screen.Recreate();
-        }
-
-        scheduler.WaitWorker();
-
-        while (!swapchain.AcquireNextImage()) {
-            swapchain.Create(layout.width, layout.height, is_srgb);
-            blit_screen.Recreate();
+    SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
+    if (!render_window.IsShown()) {
+        return;
+    }
+    const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
+    const bool use_accelerated =
+        rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+    const bool is_srgb = use_accelerated && screen_info.is_srgb;
+
+    bool has_been_recreated = false;
+    const auto recreate_swapchain = [&] {
+        if (!has_been_recreated) {
+            has_been_recreated = true;
+            scheduler.WaitWorker();
         }
-        const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
-
-        scheduler.Flush(render_semaphore);
-
-        if (swapchain.Present(render_semaphore)) {
-            blit_screen.Recreate();
+        const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
+        swapchain.Create(layout.width, layout.height, is_srgb);
+    };
+    if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) {
+        recreate_swapchain();
+    }
+    bool is_outdated;
+    do {
+        swapchain.AcquireNextImage();
+        is_outdated = swapchain.IsOutDated();
+        if (is_outdated) {
+            recreate_swapchain();
         }
-        gpu.RendererFrameEndNotify();
-        rasterizer.TickFrame();
+    } while (is_outdated);
+    if (has_been_recreated) {
+        blit_screen.Recreate();
     }
+    const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
+    scheduler.Flush(render_semaphore);
+    scheduler.WaitWorker();
+    swapchain.Present(render_semaphore);
 
-    render_window.OnFrameDisplayed();
+    gpu.RendererFrameEndNotify();
+    rasterizer.TickFrame();
 }
 
 void RendererVulkan::Report() const {
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 363134129..516f428e7 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
                     .depth = 1,
                 },
         };
-        scheduler.Record(
-            [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
-                const VkImageMemoryBarrier base_barrier{
-                    .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                    .pNext = nullptr,
-                    .srcAccessMask = 0,
-                    .dstAccessMask = 0,
-                    .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
-                    .newLayout = VK_IMAGE_LAYOUT_GENERAL,
-                    .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                    .image = image,
-                    .subresourceRange =
-                        {
-                            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                            .baseMipLevel = 0,
-                            .levelCount = 1,
-                            .baseArrayLayer = 0,
-                            .layerCount = 1,
-                        },
-                };
-                VkImageMemoryBarrier read_barrier = base_barrier;
-                read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
-                read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-                read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-
-                VkImageMemoryBarrier write_barrier = base_barrier;
-                write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-                write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
-
-                cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
-                                       0, read_barrier);
-                cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
-                cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
-                                       VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
-            });
+        scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
+            const VkImage image = *raw_images[image_index];
+            const VkImageMemoryBarrier base_barrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = 0,
+                .dstAccessMask = 0,
+                .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = image,
+                .subresourceRange{
+                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                    .baseMipLevel = 0,
+                    .levelCount = 1,
+                    .baseArrayLayer = 0,
+                    .layerCount = 1,
+                },
+            };
+            VkImageMemoryBarrier read_barrier = base_barrier;
+            read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+            read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+            read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+            VkImageMemoryBarrier write_barrier = base_barrier;
+            write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+            write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
+                                   read_barrier);
+            cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
+            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+                                   VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
+        });
     }
-    scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
-                      descriptor_set = descriptor_sets[image_index], buffer = *buffer,
-                      size = swapchain.GetSize(), pipeline = *pipeline,
-                      layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
         const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
         const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
         const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
@@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
         const VkRenderPassBeginInfo renderpass_bi{
             .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
             .pNext = nullptr,
-            .renderPass = renderpass,
-            .framebuffer = framebuffer,
+            .renderPass = *renderpass,
+            .framebuffer = *framebuffers[image_index],
             .renderArea =
                 {
                     .offset = {0, 0},
@@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
             .extent = size,
         };
         cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
-        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
         cmdbuf.SetViewport(0, viewport);
         cmdbuf.SetScissor(0, scissor);
 
-        cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices));
-        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {});
+        cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
+                                  descriptor_sets[image_index], {});
         cmdbuf.Draw(4, 1, 0, 0);
         cmdbuf.EndRenderPass();
     });
@@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() {
 
 void VKBlitScreen::CreateSemaphores() {
     semaphores.resize(image_count);
-    std::generate(semaphores.begin(), semaphores.end(),
-                  [this] { return device.GetLogical().CreateSemaphore(); });
+    std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
 }
 
 void VKBlitScreen::CreateDescriptorPool() {
@@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() {
 }
 
 void VKBlitScreen::ReleaseRawImages() {
-    for (std::size_t i = 0; i < raw_images.size(); ++i) {
-        scheduler.Wait(resource_ticks.at(i));
+    for (const u64 tick : resource_ticks) {
+        scheduler.Wait(tick);
     }
     raw_images.clear();
     raw_buffer_commits.clear();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 0def1e769..f4b3ee95c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -60,38 +60,74 @@ std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
     }
     return indices;
 }
-} // Anonymous namespace
-
-Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
-    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
 
-Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
-               VAddr cpu_addr_, u64 size_bytes_)
-    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
-    buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+vk::Buffer CreateBuffer(const Device& device, u64 size) {
+    VkBufferUsageFlags flags =
+        VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+        VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
+        VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+        VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+    if (device.IsExtTransformFeedbackSupported()) {
+        flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
+    }
+    return device.GetLogical().CreateBuffer({
         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .size = SizeBytes(),
-        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
-                 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
-                 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
-                 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
-                 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+        .size = size,
+        .usage = flags,
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
     });
+}
+} // Anonymous namespace
+
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+               VAddr cpu_addr_, u64 size_bytes_)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
+      device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
+      commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
     if (runtime.device.HasDebuggingToolAttached()) {
         buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
     }
-    commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+}
+
+VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
+    if (!device) {
+        // Null buffer, return a null descriptor
+        return VK_NULL_HANDLE;
+    }
+    const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
+        return offset == view.offset && size == view.size && format == view.format;
+    })};
+    if (it != views.end()) {
+        return *it->handle;
+    }
+    views.push_back({
+        .offset = offset,
+        .size = size,
+        .format = format,
+        .handle = device->GetLogical().CreateBufferView({
+            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .buffer = *buffer,
+            .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
+            .offset = offset,
+            .range = size,
+        }),
+    });
+    return *views.back().handle;
 }
 
 BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
                                        VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
                                        VKUpdateDescriptorQueue& update_descriptor_queue_,
-                                       VKDescriptorPool& descriptor_pool)
+                                       DescriptorPool& descriptor_pool)
     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
       staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3bb81d5b3..c27402ff0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -9,13 +9,14 @@
 #include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/surface.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 class Device;
-class VKDescriptorPool;
+class DescriptorPool;
 class VKScheduler;
 
 class BufferCacheRuntime;
@@ -26,6 +27,8 @@ public:
     explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
                     VAddr cpu_addr_, u64 size_bytes_);
 
+    [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
+
     [[nodiscard]] VkBuffer Handle() const noexcept {
         return *buffer;
     }
@@ -35,8 +38,17 @@ public:
     }
 
 private:
+    struct BufferView {
+        u32 offset;
+        u32 size;
+        VideoCore::Surface::PixelFormat format;
+        vk::BufferView handle;
+    };
+
+    const Device* device{};
     vk::Buffer buffer;
     MemoryCommit commit;
+    std::vector<BufferView> views;
 };
 
 class BufferCacheRuntime {
@@ -49,7 +61,7 @@ public:
     explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
                                 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
                                 VKUpdateDescriptorQueue& update_descriptor_queue_,
-                                VKDescriptorPool& descriptor_pool);
+                                DescriptorPool& descriptor_pool);
 
     void Finish();
 
@@ -87,6 +99,11 @@ public:
         BindBuffer(buffer, offset, size);
     }
 
+    void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+                           VideoCore::Surface::PixelFormat format) {
+        update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
+    }
+
 private:
     void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
         update_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -124,6 +141,7 @@ struct BufferCacheParams {
     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
     static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
     static constexpr bool USE_MEMORY_MAPS = true;
+    static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
 };
 
 using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 4181d83ee..8e426ce2c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
 constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
 constexpr size_t ASTC_NUM_BINDINGS = 4;
 
-VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
-    return {
-        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-        .offset = 0,
-        .size = static_cast<u32>(size),
-    };
-}
-
-std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
-    return {{
-        {
-            .binding = 0,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
-        {
-            .binding = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
-    }};
-}
+template <size_t size>
+inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
+    .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+    .offset = 0,
+    .size = static_cast<u32>(size),
+};
 
-std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() {
-    return {{
-        {
-            .binding = ASTC_BINDING_INPUT_BUFFER,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
-        {
-            .binding = ASTC_BINDING_ENC_BUFFER,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
-        {
-            .binding = ASTC_BINDING_SWIZZLE_BUFFER,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
-        {
-            .binding = ASTC_BINDING_OUTPUT_IMAGE,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
-    }};
-}
+constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
+    {
+        .binding = 0,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .descriptorCount = 1,
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .pImmutableSamplers = nullptr,
+    },
+    {
+        .binding = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .descriptorCount = 1,
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .pImmutableSamplers = nullptr,
+    },
+}};
+
+constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
+    .uniform_buffers = 0,
+    .storage_buffers = 2,
+    .texture_buffers = 0,
+    .image_buffers = 0,
+    .textures = 0,
+    .images = 0,
+    .score = 2,
+};
 
-VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
-    return {
-        .dstBinding = 0,
-        .dstArrayElement = 0,
-        .descriptorCount = 2,
+constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
+    {
+        .binding = ASTC_BINDING_INPUT_BUFFER,
         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-        .offset = 0,
-        .stride = sizeof(DescriptorUpdateEntry),
-    };
-}
+        .descriptorCount = 1,
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .pImmutableSamplers = nullptr,
+    },
+    {
+        .binding = ASTC_BINDING_ENC_BUFFER,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .descriptorCount = 1,
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .pImmutableSamplers = nullptr,
+    },
+    {
+        .binding = ASTC_BINDING_SWIZZLE_BUFFER,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .descriptorCount = 1,
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .pImmutableSamplers = nullptr,
+    },
+    {
+        .binding = ASTC_BINDING_OUTPUT_IMAGE,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+        .descriptorCount = 1,
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .pImmutableSamplers = nullptr,
+    },
+}};
+
+constexpr DescriptorBankInfo ASTC_BANK_INFO{
+    .uniform_buffers = 0,
+    .storage_buffers = 3,
+    .texture_buffers = 0,
+    .image_buffers = 0,
+    .textures = 0,
+    .images = 1,
+    .score = 4,
+};
 
-std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
-BuildASTCPassDescriptorUpdateTemplateEntry() {
-    return {{
+constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
+    .dstBinding = 0,
+    .dstArrayElement = 0,
+    .descriptorCount = 2,
+    .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+    .offset = 0,
+    .stride = sizeof(DescriptorUpdateEntry),
+};
+
+constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
+    ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
         {
             .dstBinding = ASTC_BINDING_INPUT_BUFFER,
             .dstArrayElement = 0,
@@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() {
             .stride = sizeof(DescriptorUpdateEntry),
         },
     }};
-}
 
 struct AstcPushConstants {
     std::array<u32, 2> blocks_dims;
@@ -159,14 +170,14 @@ struct AstcPushConstants {
     u32 block_height;
     u32 block_height_mask;
 };
-
 } // Anonymous namespace
 
-VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
-                             vk::Span<VkDescriptorSetLayoutBinding> bindings,
-                             vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
-                             vk::Span<VkPushConstantRange> push_constants,
-                             std::span<const u32> code) {
+ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
+                         vk::Span<VkDescriptorSetLayoutBinding> bindings,
+                         vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
+                         const DescriptorBankInfo& bank_info,
+                         vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
+    : device{device_} {
     descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
         .pNext = nullptr,
@@ -196,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
             .pipelineLayout = *layout,
             .set = 0,
         });
-
-        descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
+        descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
     }
     module = device.GetLogical().CreateShaderModule({
         .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -206,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
         .codeSize = static_cast<u32>(code.size_bytes()),
         .pCode = code.data(),
     });
+    device.SaveShader(code);
     pipeline = device.GetLogical().CreateComputePipeline({
         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .stage =
-            {
-                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-                .pNext = nullptr,
-                .flags = 0,
-                .stage = VK_SHADER_STAGE_COMPUTE_BIT,
-                .module = *module,
-                .pName = "main",
-                .pSpecializationInfo = nullptr,
-            },
+        .stage{
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+            .module = *module,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
         .layout = *layout,
         .basePipelineHandle = nullptr,
         .basePipelineIndex = 0,
     });
 }
 
-VKComputePass::~VKComputePass() = default;
+ComputePass::~ComputePass() = default;
 
-VkDescriptorSet VKComputePass::CommitDescriptorSet(
-    VKUpdateDescriptorQueue& update_descriptor_queue) {
-    if (!descriptor_template) {
-        return nullptr;
-    }
-    const VkDescriptorSet set = descriptor_allocator->Commit();
-    update_descriptor_queue.Send(*descriptor_template, set);
-    return set;
-}
-
-Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
-                     VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
+Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_,
+                     DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
                      VKUpdateDescriptorQueue& update_descriptor_queue_)
-    : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
-                    BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
+    : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
+                  INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
+                  VULKAN_UINT8_COMP_SPV),
       scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
       update_descriptor_queue{update_descriptor_queue_} {}
 
@@ -256,11 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
     update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
-    const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
+    const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+    const VkBuffer buffer{staging.buffer};
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
-                      num_vertices](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
         static constexpr u32 DISPATCH_SIZE = 1024;
         static constexpr VkMemoryBarrier WRITE_BARRIER{
             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -268,8 +269,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
             .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
         };
-        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
+        const VkDescriptorSet set = descriptor_allocator.Commit();
+        device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
         cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
@@ -278,12 +281,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
 }
 
 QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
-                                 VKDescriptorPool& descriptor_pool_,
+                                 DescriptorPool& descriptor_pool_,
                                  StagingBufferPool& staging_buffer_pool_,
                                  VKUpdateDescriptorQueue& update_descriptor_queue_)
-    : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
-                    BuildInputOutputDescriptorUpdateTemplate(),
-                    BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
+    : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
+                  INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
+                  COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
       scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
       update_descriptor_queue{update_descriptor_queue_} {}
 
@@ -313,11 +316,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
     update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
-    const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
+    const void* const descriptor_data{update_descriptor_queue.UpdateData()};
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
-                      num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
+                      index_shift](vk::CommandBuffer cmdbuf) {
         static constexpr u32 DISPATCH_SIZE = 1024;
         static constexpr VkMemoryBarrier WRITE_BARRIER{
             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -325,10 +328,12 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
             .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
         };
-        const std::array push_constants = {base_vertex, index_shift};
-        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
-        cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
+        const std::array push_constants{base_vertex, index_shift};
+        const VkDescriptorSet set = descriptor_allocator.Commit();
+        device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
+        cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
                              &push_constants);
         cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
@@ -338,15 +343,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
 }
 
 ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
-                                 VKDescriptorPool& descriptor_pool_,
+                                 DescriptorPool& descriptor_pool_,
                                  StagingBufferPool& staging_buffer_pool_,
                                  VKUpdateDescriptorQueue& update_descriptor_queue_,
                                  MemoryAllocator& memory_allocator_)
-    : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(),
-                    BuildASTCPassDescriptorUpdateTemplateEntry(),
-                    BuildComputePushConstantRange(sizeof(AstcPushConstants)),
-                    ASTC_DECODER_COMP_SPV),
-      device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
+    : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
+                  ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
+                  COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
+      scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
       update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
 
 ASTCDecoderPass::~ASTCDecoderPass() = default;
@@ -444,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
         update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
                                           sizeof(SWIZZLE_TABLE));
         update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
-
-        const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
-        const VkPipelineLayout vk_layout = *layout;
+        const void* const descriptor_data{update_descriptor_queue.UpdateData()};
 
         // To unswizzle the ASTC data
         const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
         ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
         ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
-        scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z,
-                          block_dims, params, set](vk::CommandBuffer cmdbuf) {
+        scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
+                          params, descriptor_data](vk::CommandBuffer cmdbuf) {
             const AstcPushConstants uniforms{
                 .blocks_dims = block_dims,
                 .bytes_per_block_log2 = params.bytes_per_block_log2,
@@ -463,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
                 .block_height = params.block_height,
                 .block_height_mask = params.block_height_mask,
             };
-            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {});
-            cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
+            const VkDescriptorSet set = descriptor_allocator.Commit();
+            device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
+            cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
             cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
         });
     }
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 5ea187c30..114aef2bd 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <optional>
 #include <span>
 #include <utility>
 
@@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue;
 class Image;
 struct StagingBufferRef;
 
-class VKComputePass {
+class ComputePass {
 public:
-    explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
-                           vk::Span<VkDescriptorSetLayoutBinding> bindings,
-                           vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
-                           vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
-    ~VKComputePass();
+    explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
+                         vk::Span<VkDescriptorSetLayoutBinding> bindings,
+                         vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
+                         const DescriptorBankInfo& bank_info,
+                         vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
+    ~ComputePass();
 
 protected:
-    VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
-
+    const Device& device;
     vk::DescriptorUpdateTemplateKHR descriptor_template;
     vk::PipelineLayout layout;
     vk::Pipeline pipeline;
+    vk::DescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
 
 private:
-    vk::DescriptorSetLayout descriptor_set_layout;
-    std::optional<DescriptorAllocator> descriptor_allocator;
     vk::ShaderModule module;
 };
 
-class Uint8Pass final : public VKComputePass {
+class Uint8Pass final : public ComputePass {
 public:
     explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
-                       VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
+                       DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
                        VKUpdateDescriptorQueue& update_descriptor_queue_);
     ~Uint8Pass();
 
@@ -66,10 +65,10 @@ private:
     VKUpdateDescriptorQueue& update_descriptor_queue;
 };
 
-class QuadIndexedPass final : public VKComputePass {
+class QuadIndexedPass final : public ComputePass {
 public:
     explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
-                             VKDescriptorPool& descriptor_pool_,
+                             DescriptorPool& descriptor_pool_,
                              StagingBufferPool& staging_buffer_pool_,
                              VKUpdateDescriptorQueue& update_descriptor_queue_);
     ~QuadIndexedPass();
@@ -84,10 +83,10 @@ private:
     VKUpdateDescriptorQueue& update_descriptor_queue;
 };
 
-class ASTCDecoderPass final : public VKComputePass {
+class ASTCDecoderPass final : public ComputePass {
 public:
     explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
-                             VKDescriptorPool& descriptor_pool_,
+                             DescriptorPool& descriptor_pool_,
                              StagingBufferPool& staging_buffer_pool_,
                              VKUpdateDescriptorQueue& update_descriptor_queue_,
                              MemoryAllocator& memory_allocator_);
@@ -99,7 +98,6 @@ public:
 private:
     void MakeDataBuffer();
 
-    const Device& device;
     VKScheduler& scheduler;
     StagingBufferPool& staging_buffer_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 3a48219b7..70b84c7a6 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -2,152 +2,198 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <vector>
 
+#include <boost/container/small_vector.hpp>
+
+#include "video_core/renderer_vulkan/pipeline_helper.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader_notify.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
-                                     VKDescriptorPool& descriptor_pool_,
-                                     VKUpdateDescriptorQueue& update_descriptor_queue_,
-                                     const SPIRVShader& shader_)
-    : device{device_}, scheduler{scheduler_}, entries{shader_.entries},
-      descriptor_set_layout{CreateDescriptorSetLayout()},
-      descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
-      update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
-      descriptor_template{CreateDescriptorUpdateTemplate()},
-      shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {}
-
-VKComputePipeline::~VKComputePipeline() = default;
-
-VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
-    if (!descriptor_template) {
-        return {};
-    }
-    const VkDescriptorSet set = descriptor_allocator.Commit();
-    update_descriptor_queue.Send(*descriptor_template, set);
-    return set;
-}
-
-vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
-    std::vector<VkDescriptorSetLayoutBinding> bindings;
-    u32 binding = 0;
-    const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
-        // TODO(Rodrigo): Maybe make individual bindings here?
-        for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
-            bindings.push_back({
-                .binding = binding++,
-                .descriptorType = descriptor_type,
-                .descriptorCount = 1,
-                .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-                .pImmutableSamplers = nullptr,
-            });
-        }
-    };
-    add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
-    add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
-    add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
-    add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
-    add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
-    add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
-
-    return device.GetLogical().CreateDescriptorSetLayout({
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .bindingCount = static_cast<u32>(bindings.size()),
-        .pBindings = bindings.data(),
-    });
-}
-
-vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
-    return device.GetLogical().CreatePipelineLayout({
-        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .setLayoutCount = 1,
-        .pSetLayouts = descriptor_set_layout.address(),
-        .pushConstantRangeCount = 0,
-        .pPushConstantRanges = nullptr,
-    });
-}
-
-vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
-    std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
-    u32 binding = 0;
-    u32 offset = 0;
-    FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
-    if (template_entries.empty()) {
-        // If the shader doesn't use descriptor sets, skip template creation.
-        return {};
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+
+ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue_,
+                                 Common::ThreadWorker* thread_worker,
+                                 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
+                                 vk::ShaderModule spv_module_)
+    : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
+      spv_module(std::move(spv_module_)) {
+    if (shader_notify) {
+        shader_notify->MarkShaderBuilding();
     }
-
-    return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
-        .pNext = nullptr,
-        .flags = 0,
-        .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
-        .pDescriptorUpdateEntries = template_entries.data(),
-        .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
-        .descriptorSetLayout = *descriptor_set_layout,
-        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-        .pipelineLayout = *layout,
-        .set = DESCRIPTOR_SET,
-    });
-}
-
-vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
-    device.SaveShader(code);
-
-    return device.GetLogical().CreateShaderModule({
-        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .codeSize = code.size() * sizeof(u32),
-        .pCode = code.data(),
-    });
-}
-
-vk::Pipeline VKComputePipeline::CreatePipeline() const {
-
-    VkComputePipelineCreateInfo ci{
-        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .stage =
-            {
+    std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
+                uniform_buffer_sizes.begin());
+
+    auto func{[this, &descriptor_pool, shader_notify] {
+        DescriptorLayoutBuilder builder{device};
+        builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
+
+        descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
+        pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
+        descriptor_update_template =
+            builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
+        descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
+        const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+            .pNext = nullptr,
+            .requiredSubgroupSize = GuestWarpSize,
+        };
+        pipeline = device.GetLogical().CreateComputePipeline({
+            .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage{
                 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-                .pNext = nullptr,
+                .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
                 .flags = 0,
                 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
-                .module = *shader_module,
+                .module = *spv_module,
                 .pName = "main",
                 .pSpecializationInfo = nullptr,
             },
-        .layout = *layout,
-        .basePipelineHandle = nullptr,
-        .basePipelineIndex = 0,
-    };
-
-    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
-        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
-        .pNext = nullptr,
-        .requiredSubgroupSize = GuestWarpSize,
-    };
-
-    if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
-        ci.stage.pNext = &subgroup_size_ci;
+            .layout = *pipeline_layout,
+            .basePipelineHandle = 0,
+            .basePipelineIndex = 0,
+        });
+        std::lock_guard lock{build_mutex};
+        is_built = true;
+        build_condvar.notify_one();
+        if (shader_notify) {
+            shader_notify->MarkShaderComplete();
+        }
+    }};
+    if (thread_worker) {
+        thread_worker->QueueWork(std::move(func));
+    } else {
+        func();
+    }
+}
+
+void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
+                                Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
+                                BufferCache& buffer_cache, TextureCache& texture_cache) {
+    update_descriptor_queue.Acquire();
+
+    buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
+    buffer_cache.UnbindComputeStorageBuffers();
+    size_t ssbo_index{};
+    for (const auto& desc : info.storage_buffers_descriptors) {
+        ASSERT(desc.count == 1);
+        buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
+                                              desc.is_written);
+        ++ssbo_index;
     }
 
-    return device.GetLogical().CreateComputePipeline(ci);
+    texture_cache.SynchronizeComputeDescriptors();
+
+    static constexpr size_t max_elements = 64;
+    std::array<ImageId, max_elements> image_view_ids;
+    boost::container::static_vector<u32, max_elements> image_view_indices;
+    boost::container::static_vector<VkSampler, max_elements> samplers;
+
+    const auto& qmd{kepler_compute.launch_description};
+    const auto& cbufs{qmd.const_buffer_config};
+    const bool via_header_index{qmd.linked_tsc != 0};
+    const auto read_handle{[&](const auto& desc, u32 index) {
+        ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
+        const u32 index_offset{index << desc.size_shift};
+        const u32 offset{desc.cbuf_offset + index_offset};
+        const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
+        if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+                      std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+            if (desc.has_secondary) {
+                ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
+                const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
+                const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
+                                             secondary_offset};
+                const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+                const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+                return TexturePair(lhs_raw | rhs_raw, via_header_index);
+            }
+        }
+        return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+    }};
+    const auto add_image{[&](const auto& desc) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            const auto handle{read_handle(desc, index)};
+            image_view_indices.push_back(handle.first);
+        }
+    }};
+    std::ranges::for_each(info.texture_buffer_descriptors, add_image);
+    std::ranges::for_each(info.image_buffer_descriptors, add_image);
+    for (const auto& desc : info.texture_descriptors) {
+        for (u32 index = 0; index < desc.count; ++index) {
+            const auto handle{read_handle(desc, index)};
+            image_view_indices.push_back(handle.first);
+
+            Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
+            samplers.push_back(sampler->Handle());
+        }
+    }
+    std::ranges::for_each(info.image_descriptors, add_image);
+
+    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+    buffer_cache.UnbindComputeTextureBuffers();
+    ImageId* texture_buffer_ids{image_view_ids.data()};
+    size_t index{};
+    const auto add_buffer{[&](const auto& desc) {
+        constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+        for (u32 i = 0; i < desc.count; ++i) {
+            bool is_written{false};
+            if constexpr (is_image) {
+                is_written = desc.is_written;
+            }
+            ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
+            buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
+                                                  image_view.BufferSize(), image_view.format,
+                                                  is_written, is_image);
+            ++texture_buffer_ids;
+            ++index;
+        }
+    }};
+    std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
+    std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
+
+    buffer_cache.UpdateComputeBuffers();
+    buffer_cache.BindHostComputeBuffers();
+
+    const VkSampler* samplers_it{samplers.data()};
+    const ImageId* views_it{image_view_ids.data()};
+    PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
+
+    if (!is_built.load(std::memory_order::relaxed)) {
+        // Wait for the pipeline to be built
+        scheduler.Record([this](vk::CommandBuffer) {
+            std::unique_lock lock{build_mutex};
+            build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+        });
+    }
+    const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+    scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+        if (!descriptor_set_layout) {
+            return;
+        }
+        const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+        const vk::Device& dev{device.GetLogical()};
+        dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
+                                  descriptor_set, nullptr);
+    });
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 7e16575ac..52fec04d3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -4,61 +4,63 @@
 
 #pragma once
 
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+
 #include "common/common_types.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
+namespace VideoCore {
+class ShaderNotify;
+}
+
 namespace Vulkan {
 
 class Device;
 class VKScheduler;
-class VKUpdateDescriptorQueue;
 
-class VKComputePipeline final {
+class ComputePipeline {
 public:
-    explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
-                               VKDescriptorPool& descriptor_pool_,
-                               VKUpdateDescriptorQueue& update_descriptor_queue_,
-                               const SPIRVShader& shader_);
-    ~VKComputePipeline();
-
-    VkDescriptorSet CommitDescriptorSet();
+    explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue,
+                             Common::ThreadWorker* thread_worker,
+                             VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
+                             vk::ShaderModule spv_module);
 
-    VkPipeline GetHandle() const {
-        return *pipeline;
-    }
+    ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
+    ComputePipeline(ComputePipeline&&) noexcept = delete;
 
-    VkPipelineLayout GetLayout() const {
-        return *layout;
-    }
+    ComputePipeline& operator=(const ComputePipeline&) = delete;
+    ComputePipeline(const ComputePipeline&) = delete;
 
-    const ShaderEntries& GetEntries() const {
-        return entries;
-    }
+    void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
+                   VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
 
 private:
-    vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
-
-    vk::PipelineLayout CreatePipelineLayout() const;
-
-    vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
-
-    vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
-
-    vk::Pipeline CreatePipeline() const;
-
     const Device& device;
-    VKScheduler& scheduler;
-    ShaderEntries entries;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    Shader::Info info;
 
+    VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
+
+    vk::ShaderModule spv_module;
     vk::DescriptorSetLayout descriptor_set_layout;
     DescriptorAllocator descriptor_allocator;
-    VKUpdateDescriptorQueue& update_descriptor_queue;
-    vk::PipelineLayout layout;
-    vk::DescriptorUpdateTemplateKHR descriptor_template;
-    vk::ShaderModule shader_module;
+    vk::PipelineLayout pipeline_layout;
+    vk::DescriptorUpdateTemplateKHR descriptor_update_template;
     vk::Pipeline pipeline;
+
+    std::condition_variable build_condvar;
+    std::mutex build_mutex;
+    std::atomic_bool is_built{false};
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ef9fb5910..8e77e4796 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <mutex>
+#include <span>
 #include <vector>
 
 #include "common/common_types.h"
@@ -13,79 +15,149 @@
 
 namespace Vulkan {
 
-// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
-constexpr std::size_t SETS_GROW_RATE = 0x20;
+// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
+constexpr size_t SETS_GROW_RATE = 16;
+constexpr s32 SCORE_THRESHOLD = 3;
+constexpr u32 SETS_PER_POOL = 64;
 
-DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
-                                         VkDescriptorSetLayout layout_)
-    : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
-      descriptor_pool{descriptor_pool_}, layout{layout_} {}
+struct DescriptorBank {
+    DescriptorBankInfo info;
+    std::vector<vk::DescriptorPool> pools;
+};
 
-DescriptorAllocator::~DescriptorAllocator() = default;
+bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
+    return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
+           texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
+           textures >= subset.textures && images >= subset.image_buffers;
+}
 
-VkDescriptorSet DescriptorAllocator::Commit() {
-    const std::size_t index = CommitResource();
-    return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
+template <typename Descriptors>
+static u32 Accumulate(const Descriptors& descriptors) {
+    u32 count = 0;
+    for (const auto& descriptor : descriptors) {
+        count += descriptor.count;
+    }
+    return count;
 }
 
-void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
-    descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
+static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
+    DescriptorBankInfo bank;
+    for (const Shader::Info& info : infos) {
+        bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
+        bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
+        bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
+        bank.image_buffers += Accumulate(info.image_buffer_descriptors);
+        bank.textures += Accumulate(info.texture_descriptors);
+        bank.images += Accumulate(info.image_descriptors);
+    }
+    bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
+                 bank.image_buffers + bank.textures + bank.images;
+    return bank;
 }
 
-VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
-    : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
-                                                                             AllocateNewPool()} {}
-
-VKDescriptorPool::~VKDescriptorPool() = default;
-
-vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
-    static constexpr u32 num_sets = 0x20000;
-    static constexpr VkDescriptorPoolSize pool_sizes[] = {
-        {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90},
-        {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
-        {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
-        {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
-        {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
+static void AllocatePool(const Device& device, DescriptorBank& bank) {
+    std::array<VkDescriptorPoolSize, 6> pool_sizes;
+    size_t pool_cursor{};
+    const auto add = [&](VkDescriptorType type, u32 count) {
+        if (count > 0) {
+            pool_sizes[pool_cursor++] = {
+                .type = type,
+                .descriptorCount = count * SETS_PER_POOL,
+            };
+        }
     };
-
-    const VkDescriptorPoolCreateInfo ci{
+    const auto& info{bank.info};
+    add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
+    add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
+    add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
+    add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
+    add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
+    add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
+    bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
         .pNext = nullptr,
         .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
-        .maxSets = num_sets,
-        .poolSizeCount = static_cast<u32>(std::size(pool_sizes)),
+        .maxSets = SETS_PER_POOL,
+        .poolSizeCount = static_cast<u32>(pool_cursor),
         .pPoolSizes = std::data(pool_sizes),
-    };
-    return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
+    }));
+}
+
+DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
+                                         DescriptorBank& bank_, VkDescriptorSetLayout layout_)
+    : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
+      layout{layout_} {}
+
+VkDescriptorSet DescriptorAllocator::Commit() {
+    const size_t index = CommitResource();
+    return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
 }
 
-vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
-                                                         std::size_t count) {
-    const std::vector layout_copies(count, layout);
-    VkDescriptorSetAllocateInfo ai{
+void DescriptorAllocator::Allocate(size_t begin, size_t end) {
+    sets.push_back(AllocateDescriptors(end - begin));
+}
+
+vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
+    const std::vector<VkDescriptorSetLayout> layouts(count, layout);
+    VkDescriptorSetAllocateInfo allocate_info{
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
         .pNext = nullptr,
-        .descriptorPool = **active_pool,
+        .descriptorPool = *bank->pools.back(),
         .descriptorSetCount = static_cast<u32>(count),
-        .pSetLayouts = layout_copies.data(),
+        .pSetLayouts = layouts.data(),
     };
-
-    vk::DescriptorSets sets = active_pool->Allocate(ai);
-    if (!sets.IsOutOfPoolMemory()) {
-        return sets;
+    vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
+    if (!new_sets.IsOutOfPoolMemory()) {
+        return new_sets;
     }
-
     // Our current pool is out of memory. Allocate a new one and retry
-    active_pool = AllocateNewPool();
-    ai.descriptorPool = **active_pool;
-    sets = active_pool->Allocate(ai);
-    if (!sets.IsOutOfPoolMemory()) {
-        return sets;
+    AllocatePool(*device, *bank);
+    allocate_info.descriptorPool = *bank->pools.back();
+    new_sets = bank->pools.back().Allocate(allocate_info);
+    if (!new_sets.IsOutOfPoolMemory()) {
+        return new_sets;
     }
-
     // After allocating a new pool, we are out of memory again. We can't handle this from here.
     throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
 }
 
+DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler)
+    : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
+
+DescriptorPool::~DescriptorPool() = default;
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+                                              std::span<const Shader::Info> infos) {
+    return Allocator(layout, MakeBankInfo(infos));
+}
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+                                              const Shader::Info& info) {
+    return Allocator(layout, MakeBankInfo(std::array{info}));
+}
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+                                              const DescriptorBankInfo& info) {
+    return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
+}
+
+DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
+    std::shared_lock read_lock{banks_mutex};
+    const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
+        return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
+    });
+    if (it != bank_infos.end()) {
+        return *banks[std::distance(bank_infos.begin(), it)].get();
+    }
+    read_lock.unlock();
+
+    std::unique_lock write_lock{banks_mutex};
+    bank_infos.push_back(reqs);
+
+    auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
+    bank.info = reqs;
+    AllocatePool(device, bank);
+    return bank;
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index f892be7be..59466aac5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -4,57 +4,85 @@
 
 #pragma once
 
+#include <shared_mutex>
+#include <span>
 #include <vector>
 
+#include "shader_recompiler/shader_info.h"
 #include "video_core/renderer_vulkan/vk_resource_pool.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 class Device;
-class VKDescriptorPool;
 class VKScheduler;
 
+struct DescriptorBank;
+
+struct DescriptorBankInfo {
+    [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
+
+    u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
+    u32 storage_buffers{}; ///< Number of storage buffer descriptors
+    u32 texture_buffers{}; ///< Number of texture buffer descriptors
+    u32 image_buffers{};   ///< Number of image buffer descriptors
+    u32 textures{};        ///< Number of texture descriptors
+    u32 images{};          ///< Number of image descriptors
+    s32 score{};           ///< Number of descriptors in total
+};
+
 class DescriptorAllocator final : public ResourcePool {
+    friend class DescriptorPool;
+
 public:
-    explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
-    ~DescriptorAllocator() override;
+    explicit DescriptorAllocator() = default;
+    ~DescriptorAllocator() override = default;
+
+    DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
+    DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
 
     DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
     DescriptorAllocator(const DescriptorAllocator&) = delete;
 
     VkDescriptorSet Commit();
 
-protected:
-    void Allocate(std::size_t begin, std::size_t end) override;
-
 private:
-    VKDescriptorPool& descriptor_pool;
-    const VkDescriptorSetLayout layout;
+    explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
+                                 DescriptorBank& bank_, VkDescriptorSetLayout layout_);
 
-    std::vector<vk::DescriptorSets> descriptors_allocations;
-};
+    void Allocate(size_t begin, size_t end) override;
+
+    vk::DescriptorSets AllocateDescriptors(size_t count);
+
+    const Device* device{};
+    DescriptorBank* bank{};
+    VkDescriptorSetLayout layout{};
 
-class VKDescriptorPool final {
-    friend DescriptorAllocator;
+    std::vector<vk::DescriptorSets> sets;
+};
 
+class DescriptorPool {
 public:
-    explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler);
-    ~VKDescriptorPool();
+    explicit DescriptorPool(const Device& device, VKScheduler& scheduler);
+    ~DescriptorPool();
 
-    VKDescriptorPool(const VKDescriptorPool&) = delete;
-    VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
+    DescriptorPool& operator=(const DescriptorPool&) = delete;
+    DescriptorPool(const DescriptorPool&) = delete;
 
-private:
-    vk::DescriptorPool* AllocateNewPool();
+    DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
+                                  std::span<const Shader::Info> infos);
+    DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
+    DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
 
-    vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
+private:
+    DescriptorBank& Bank(const DescriptorBankInfo& reqs);
 
     const Device& device;
     MasterSemaphore& master_semaphore;
 
-    std::vector<vk::DescriptorPool> pools;
-    vk::DescriptorPool* active_pool;
+    std::shared_mutex banks_mutex;
+    std::vector<DescriptorBankInfo> bank_infos;
+    std::vector<std::unique_ptr<DescriptorBank>> banks;
 };
 
 } // namespace Vulkan
 \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index fc6dd83eb..18482e1d0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -1,29 +1,58 @@
-// Copyright 2019 yuzu Emulator Project
+// Copyright 2021 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #include <algorithm>
-#include <array>
-#include <cstring>
-#include <vector>
+#include <span>
 
-#include "common/common_types.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include <boost/container/small_vector.hpp>
+#include <boost/container/static_vector.hpp>
+
+#include "common/bit_field.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/pipeline_helper.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader_notify.h"
 #include "video_core/vulkan_common/vulkan_device.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
-namespace Vulkan {
 
-MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+#if defined(_MSC_VER) && defined(NDEBUG)
+#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
+#else
+#define LAMBDA_FORCEINLINE
+#endif
 
+namespace Vulkan {
 namespace {
+using boost::container::small_vector;
+using boost::container::static_vector;
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+
+constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
+constexpr size_t MAX_IMAGE_ELEMENTS = 64;
+
+DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
+    DescriptorLayoutBuilder builder{device};
+    for (size_t index = 0; index < infos.size(); ++index) {
+        static constexpr std::array stages{
+            VK_SHADER_STAGE_VERTEX_BIT,
+            VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+            VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+            VK_SHADER_STAGE_GEOMETRY_BIT,
+            VK_SHADER_STAGE_FRAGMENT_BIT,
+        };
+        builder.Add(infos[index], stages.at(index));
+    }
+    return builder;
+}
 
 template <class StencilFace>
 VkStencilOpState GetStencilFaceState(const StencilFace& face) {
@@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) {
 }
 
 bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
-    static constexpr std::array unsupported_topologies = {
+    static constexpr std::array unsupported_topologies{
         VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
         VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
         VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
         VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
         VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
-        VK_PRIMITIVE_TOPOLOGY_PATCH_LIST};
-    return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
-                     topology) == std::end(unsupported_topologies);
+        VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
+        // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
+    };
+    return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
+}
+
+bool IsLine(VkPrimitiveTopology topology) {
+    static constexpr std::array line_topologies{
+        VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
+        // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT,
+    };
+    return std::ranges::find(line_topologies, topology) == line_topologies.end();
 }
 
 VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
@@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
         BitField<12, 3, Maxwell::ViewportSwizzle> w;
     };
     const Swizzle unpacked{swizzle};
-
-    return {
+    return VkViewportSwizzleNV{
         .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
         .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
         .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
@@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
     };
 }
 
-VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
-    switch (msaa_mode) {
-    case Tegra::Texture::MsaaMode::Msaa1x1:
-        return VK_SAMPLE_COUNT_1_BIT;
-    case Tegra::Texture::MsaaMode::Msaa2x1:
-    case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
-        return VK_SAMPLE_COUNT_2_BIT;
-    case Tegra::Texture::MsaaMode::Msaa2x2:
-    case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
-    case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
-        return VK_SAMPLE_COUNT_4_BIT;
-    case Tegra::Texture::MsaaMode::Msaa4x2:
-    case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
-    case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
-    case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
-        return VK_SAMPLE_COUNT_8_BIT;
-    case Tegra::Texture::MsaaMode::Msaa4x4:
-        return VK_SAMPLE_COUNT_16_BIT;
-    default:
-        UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
-        return VK_SAMPLE_COUNT_1_BIT;
+PixelFormat DecodeFormat(u8 encoded_format) {
+    const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
+    if (format == Tegra::RenderTargetFormat::NONE) {
+        return PixelFormat::Invalid;
     }
+    return PixelFormatFromRenderTargetFormat(format);
 }
 
-} // Anonymous namespace
+RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
+    RenderPassKey key;
+    std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
+    if (state.depth_enabled != 0) {
+        const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
+        key.depth_format = PixelFormatFromDepthFormat(depth_format);
+    } else {
+        key.depth_format = PixelFormat::Invalid;
+    }
+    key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
+    return key;
+}
 
-VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
-                                       VKDescriptorPool& descriptor_pool_,
-                                       VKUpdateDescriptorQueue& update_descriptor_queue_,
-                                       const GraphicsPipelineCacheKey& key,
-                                       vk::Span<VkDescriptorSetLayoutBinding> bindings,
-                                       const SPIRVProgram& program, u32 num_color_buffers)
-    : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
-      descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
-      descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
-      update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
-      descriptor_template{CreateDescriptorUpdateTemplate(program)},
-      modules(CreateShaderModules(program)),
-      pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
-
-VKGraphicsPipeline::~VKGraphicsPipeline() = default;
-
-VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
-    if (!descriptor_template) {
-        return {};
-    }
-    const VkDescriptorSet set = descriptor_allocator.Commit();
-    update_descriptor_queue.Send(*descriptor_template, set);
-    return set;
+size_t NumAttachments(const FixedPipelineState& state) {
+    size_t num{};
+    for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+        const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
+        if (format != Tegra::RenderTargetFormat::NONE) {
+            num = index + 1;
+        }
+    }
+    return num;
 }
 
-vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
-    vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
-    const VkDescriptorSetLayoutCreateInfo ci{
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .bindingCount = bindings.size(),
-        .pBindings = bindings.data(),
-    };
-    return device.GetLogical().CreateDescriptorSetLayout(ci);
+template <typename Spec>
+bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+            const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
+    for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+        if (!Spec::enabled_stages[stage] && modules[stage]) {
+            return false;
+        }
+        const auto& info{stage_infos[stage]};
+        if constexpr (!Spec::has_storage_buffers) {
+            if (!info.storage_buffers_descriptors.empty()) {
+                return false;
+            }
+        }
+        if constexpr (!Spec::has_texture_buffers) {
+            if (!info.texture_buffer_descriptors.empty()) {
+                return false;
+            }
+        }
+        if constexpr (!Spec::has_image_buffers) {
+            if (!info.image_buffer_descriptors.empty()) {
+                return false;
+            }
+        }
+        if constexpr (!Spec::has_images) {
+            if (!info.image_descriptors.empty()) {
+                return false;
+            }
+        }
+    }
+    return true;
 }
 
-vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
-    const VkPipelineLayoutCreateInfo ci{
-        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .setLayoutCount = 1,
-        .pSetLayouts = descriptor_set_layout.address(),
-        .pushConstantRangeCount = 0,
-        .pPushConstantRanges = nullptr,
-    };
-    return device.GetLogical().CreatePipelineLayout(ci);
+using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
+
+template <typename Spec, typename... Specs>
+ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+                          const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
+    if constexpr (sizeof...(Specs) > 0) {
+        if (!Passes<Spec>(modules, stage_infos)) {
+            return FindSpec<Specs...>(modules, stage_infos);
+        }
+    }
+    return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
 }
 
-vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
-    const SPIRVProgram& program) const {
-    std::vector<VkDescriptorUpdateTemplateEntry> template_entries;
-    u32 binding = 0;
-    u32 offset = 0;
-    for (const auto& stage : program) {
-        if (stage) {
-            FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
+struct SimpleVertexFragmentSpec {
+    static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+    static constexpr bool has_storage_buffers = false;
+    static constexpr bool has_texture_buffers = false;
+    static constexpr bool has_image_buffers = false;
+    static constexpr bool has_images = false;
+};
+
+struct SimpleVertexSpec {
+    static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
+    static constexpr bool has_storage_buffers = false;
+    static constexpr bool has_texture_buffers = false;
+    static constexpr bool has_image_buffers = false;
+    static constexpr bool has_images = false;
+};
+
+struct DefaultSpec {
+    static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
+    static constexpr bool has_storage_buffers = true;
+    static constexpr bool has_texture_buffers = true;
+    static constexpr bool has_image_buffers = true;
+    static constexpr bool has_images = true;
+};
+
+ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+                               const std::array<Shader::Info, NUM_STAGES>& infos) {
+    return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos);
+}
+} // Anonymous namespace
+
+GraphicsPipeline::GraphicsPipeline(
+    Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+    VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
+    VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
+    VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
+    RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_,
+    std::array<vk::ShaderModule, NUM_STAGES> stages,
+    const std::array<const Shader::Info*, NUM_STAGES>& infos)
+    : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
+      texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
+      update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
+    if (shader_notify) {
+        shader_notify->MarkShaderBuilding();
+    }
+    for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+        const Shader::Info* const info{infos[stage]};
+        if (!info) {
+            continue;
         }
+        stage_infos[stage] = *info;
+        enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
+        std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
     }
-    if (template_entries.empty()) {
-        // If the shader doesn't use descriptor sets, skip template creation.
-        return {};
+    auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] {
+        DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
+        uses_push_descriptor = builder.CanUsePushDescriptor();
+        descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
+        if (!uses_push_descriptor) {
+            descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
+        }
+        const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
+        pipeline_layout = builder.CreatePipelineLayout(set_layout);
+        descriptor_update_template =
+            builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor);
+
+        const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
+        Validate();
+        MakePipeline(render_pass);
+
+        std::lock_guard lock{build_mutex};
+        is_built = true;
+        build_condvar.notify_one();
+        if (shader_notify) {
+            shader_notify->MarkShaderComplete();
+        }
+    }};
+    if (worker_thread) {
+        worker_thread->QueueWork(std::move(func));
+    } else {
+        func();
     }
+    configure_func = ConfigureFunc(spv_modules, stage_infos);
+}
 
-    const VkDescriptorUpdateTemplateCreateInfoKHR ci{
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
-        .pNext = nullptr,
-        .flags = 0,
-        .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
-        .pDescriptorUpdateEntries = template_entries.data(),
-        .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
-        .descriptorSetLayout = *descriptor_set_layout,
-        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-        .pipelineLayout = *layout,
-        .set = DESCRIPTOR_SET,
-    };
-    return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
+void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
+    transition_keys.push_back(transition->key);
+    transitions.push_back(transition);
 }
 
-std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
-    const SPIRVProgram& program) const {
-    VkShaderModuleCreateInfo ci{
-        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .codeSize = 0,
-        .pCode = nullptr,
-    };
+template <typename Spec>
+void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
+    std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
+    std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
+    std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
+    size_t sampler_index{};
+    size_t image_index{};
+
+    texture_cache.SynchronizeGraphicsDescriptors();
+
+    buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
+
+    const auto& regs{maxwell3d.regs};
+    const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+    const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+        const Shader::Info& info{stage_infos[stage]};
+        buffer_cache.UnbindGraphicsStorageBuffers(stage);
+        if constexpr (Spec::has_storage_buffers) {
+            size_t ssbo_index{};
+            for (const auto& desc : info.storage_buffers_descriptors) {
+                ASSERT(desc.count == 1);
+                buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
+                                                       desc.cbuf_offset, desc.is_written);
+                ++ssbo_index;
+            }
+        }
+        const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+        const auto read_handle{[&](const auto& desc, u32 index) {
+            ASSERT(cbufs[desc.cbuf_index].enabled);
+            const u32 index_offset{index << desc.size_shift};
+            const u32 offset{desc.cbuf_offset + index_offset};
+            const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
+            if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+                          std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+                if (desc.has_secondary) {
+                    ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
+                    const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
+                    const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
+                                                 second_offset};
+                    const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+                    const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+                    const u32 raw{lhs_raw | rhs_raw};
+                    return TexturePair(raw, via_header_index);
+                }
+            }
+            return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+        }};
+        const auto add_image{[&](const auto& desc) {
+            for (u32 index = 0; index < desc.count; ++index) {
+                const auto handle{read_handle(desc, index)};
+                image_view_indices[image_index++] = handle.first;
+            }
+        }};
+        if constexpr (Spec::has_texture_buffers) {
+            for (const auto& desc : info.texture_buffer_descriptors) {
+                add_image(desc);
+            }
+        }
+        if constexpr (Spec::has_image_buffers) {
+            for (const auto& desc : info.image_buffer_descriptors) {
+                add_image(desc);
+            }
+        }
+        for (const auto& desc : info.texture_descriptors) {
+            for (u32 index = 0; index < desc.count; ++index) {
+                const auto handle{read_handle(desc, index)};
+                image_view_indices[image_index++] = handle.first;
 
-    std::vector<vk::ShaderModule> shader_modules;
-    shader_modules.reserve(Maxwell::MaxShaderStage);
-    for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
-        const auto& stage = program[i];
-        if (!stage) {
-            continue;
+                Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
+                samplers[sampler_index++] = sampler->Handle();
+            }
+        }
+        if constexpr (Spec::has_images) {
+            for (const auto& desc : info.image_descriptors) {
+                add_image(desc);
+            }
         }
+    }};
+    if constexpr (Spec::enabled_stages[0]) {
+        config_stage(0);
+    }
+    if constexpr (Spec::enabled_stages[1]) {
+        config_stage(1);
+    }
+    if constexpr (Spec::enabled_stages[2]) {
+        config_stage(2);
+    }
+    if constexpr (Spec::enabled_stages[3]) {
+        config_stage(3);
+    }
+    if constexpr (Spec::enabled_stages[4]) {
+        config_stage(4);
+    }
+    const std::span indices_span(image_view_indices.data(), image_index);
+    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+    ImageId* texture_buffer_index{image_view_ids.data()};
+    const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
+        size_t index{};
+        const auto add_buffer{[&](const auto& desc) {
+            constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+            for (u32 i = 0; i < desc.count; ++i) {
+                bool is_written{false};
+                if constexpr (is_image) {
+                    is_written = desc.is_written;
+                }
+                ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+                buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
+                                                       image_view.BufferSize(), image_view.format,
+                                                       is_written, is_image);
+                ++index;
+                ++texture_buffer_index;
+            }
+        }};
+        buffer_cache.UnbindGraphicsTextureBuffers(stage);
 
-        device.SaveShader(stage->code);
+        const Shader::Info& info{stage_infos[stage]};
+        if constexpr (Spec::has_texture_buffers) {
+            for (const auto& desc : info.texture_buffer_descriptors) {
+                add_buffer(desc);
+            }
+        }
+        if constexpr (Spec::has_image_buffers) {
+            for (const auto& desc : info.image_buffer_descriptors) {
+                add_buffer(desc);
+            }
+        }
+        for (const auto& desc : info.texture_descriptors) {
+            texture_buffer_index += desc.count;
+        }
+        if constexpr (Spec::has_images) {
+            for (const auto& desc : info.image_descriptors) {
+                texture_buffer_index += desc.count;
+            }
+        }
+    }};
+    if constexpr (Spec::enabled_stages[0]) {
+        bind_stage_info(0);
+    }
+    if constexpr (Spec::enabled_stages[1]) {
+        bind_stage_info(1);
+    }
+    if constexpr (Spec::enabled_stages[2]) {
+        bind_stage_info(2);
+    }
+    if constexpr (Spec::enabled_stages[3]) {
+        bind_stage_info(3);
+    }
+    if constexpr (Spec::enabled_stages[4]) {
+        bind_stage_info(4);
+    }
+
+    buffer_cache.UpdateGraphicsBuffers(is_indexed);
+    buffer_cache.BindHostGeometryBuffers(is_indexed);
 
-        ci.codeSize = stage->code.size() * sizeof(u32);
-        ci.pCode = stage->code.data();
-        shader_modules.push_back(device.GetLogical().CreateShaderModule(ci));
+    update_descriptor_queue.Acquire();
+
+    const VkSampler* samplers_it{samplers.data()};
+    const ImageId* views_it{image_view_ids.data()};
+    const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+        buffer_cache.BindHostStageBuffers(stage);
+        PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
+                             update_descriptor_queue);
+    }};
+    if constexpr (Spec::enabled_stages[0]) {
+        prepare_stage(0);
+    }
+    if constexpr (Spec::enabled_stages[1]) {
+        prepare_stage(1);
     }
-    return shader_modules;
+    if constexpr (Spec::enabled_stages[2]) {
+        prepare_stage(2);
+    }
+    if constexpr (Spec::enabled_stages[3]) {
+        prepare_stage(3);
+    }
+    if constexpr (Spec::enabled_stages[4]) {
+        prepare_stage(4);
+    }
+    ConfigureDraw();
 }
 
-vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
-                                                VkRenderPass renderpass,
-                                                u32 num_color_buffers) const {
-    const auto& state = cache_key.fixed_state;
-    const auto& viewport_swizzles = state.viewport_swizzles;
-
-    FixedPipelineState::DynamicState dynamic;
-    if (device.IsExtExtendedDynamicStateSupported()) {
-        // Insert dummy values, as long as they are valid they don't matter as extended dynamic
-        // state is ignored
-        dynamic.raw1 = 0;
-        dynamic.raw2 = 0;
-        dynamic.vertex_strides.fill(0);
-    } else {
-        dynamic = state.dynamic_state;
-    }
-
-    std::vector<VkVertexInputBindingDescription> vertex_bindings;
-    std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
-    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        const bool instanced = state.binding_divisors[index] != 0;
-        const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
-        vertex_bindings.push_back({
-            .binding = static_cast<u32>(index),
-            .stride = dynamic.vertex_strides[index],
-            .inputRate = rate,
+void GraphicsPipeline::ConfigureDraw() {
+    texture_cache.UpdateRenderTargets(false);
+    scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
+
+    if (!is_built.load(std::memory_order::relaxed)) {
+        // Wait for the pipeline to be built
+        scheduler.Record([this](vk::CommandBuffer) {
+            std::unique_lock lock{build_mutex};
+            build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
         });
-        if (instanced) {
-            vertex_binding_divisors.push_back({
-                .binding = static_cast<u32>(index),
-                .divisor = state.binding_divisors[index],
-            });
-        }
     }
+    const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
+    const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+    scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
+        if (bind_pipeline) {
+            cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+        }
+        if (!descriptor_set_layout) {
+            return;
+        }
+        if (uses_push_descriptor) {
+            cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout,
+                                                    0, descriptor_data);
+        } else {
+            const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+            const vk::Device& dev{device.GetLogical()};
+            dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
+            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
+                                      descriptor_set, nullptr);
+        }
+    });
+}
 
-    std::vector<VkVertexInputAttributeDescription> vertex_attributes;
-    const auto& input_attributes = program[0]->entries.attributes;
-    for (std::size_t index = 0; index < state.attributes.size(); ++index) {
-        const auto& attribute = state.attributes[index];
-        if (!attribute.enabled) {
-            continue;
+void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
+    FixedPipelineState::DynamicState dynamic{};
+    if (!key.state.extended_dynamic_state) {
+        dynamic = key.state.dynamic_state;
+    }
+    static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
+    static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
+    static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
+    if (key.state.dynamic_vertex_input) {
+        for (size_t index = 0; index < key.state.attributes.size(); ++index) {
+            const u32 type = key.state.DynamicAttributeType(index);
+            if (!stage_infos[0].loads.Generic(index) || type == 0) {
+                continue;
+            }
+            vertex_attributes.push_back({
+                .location = static_cast<u32>(index),
+                .binding = 0,
+                .format = type == 1 ? VK_FORMAT_R32_SFLOAT
+                                    : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT,
+                .offset = 0,
+            });
         }
-        if (!input_attributes.contains(static_cast<u32>(index))) {
-            // Skip attributes not used by the vertex shaders.
-            continue;
+        if (!vertex_attributes.empty()) {
+            vertex_bindings.push_back({
+                .binding = 0,
+                .stride = 4,
+                .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+            });
+        }
+    } else {
+        for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+            const bool instanced = key.state.binding_divisors[index] != 0;
+            const auto rate =
+                instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
+            vertex_bindings.push_back({
+                .binding = static_cast<u32>(index),
+                .stride = dynamic.vertex_strides[index],
+                .inputRate = rate,
+            });
+            if (instanced) {
+                vertex_binding_divisors.push_back({
+                    .binding = static_cast<u32>(index),
+                    .divisor = key.state.binding_divisors[index],
+                });
+            }
+        }
+        for (size_t index = 0; index < key.state.attributes.size(); ++index) {
+            const auto& attribute = key.state.attributes[index];
+            if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) {
+                continue;
+            }
+            vertex_attributes.push_back({
+                .location = static_cast<u32>(index),
+                .binding = attribute.buffer,
+                .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
+                .offset = attribute.offset,
+            });
         }
-        vertex_attributes.push_back({
-            .location = static_cast<u32>(index),
-            .binding = attribute.buffer,
-            .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
-            .offset = attribute.offset,
-        });
     }
-
     VkPipelineVertexInputStateCreateInfo vertex_input_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
         .pNext = nullptr,
@@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
         .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
         .pVertexAttributeDescriptions = vertex_attributes.data(),
     };
-
     const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
         .pNext = nullptr,
@@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
     if (!vertex_binding_divisors.empty()) {
         vertex_input_ci.pNext = &input_divisor_ci;
     }
-
-    const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
+    auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
+    if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
+        if (!spv_modules[1] && !spv_modules[2]) {
+            LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
+            input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+        }
+    }
     const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
-        .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
+        .topology = input_assembly_topology,
+        .primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
                                   SupportsPrimitiveRestart(input_assembly_topology),
     };
-
     const VkPipelineTessellationStateCreateInfo tessellation_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
-    };
-
-    VkPipelineViewportStateCreateInfo viewport_ci{
-        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .viewportCount = Maxwell::NumViewports,
-        .pViewports = nullptr,
-        .scissorCount = Maxwell::NumViewports,
-        .pScissors = nullptr,
+        .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
     };
 
     std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
-    std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
-    VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
+    std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
+    const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
         .pNext = nullptr,
         .flags = 0,
         .viewportCount = Maxwell::NumViewports,
         .pViewportSwizzles = swizzles.data(),
     };
-    if (device.IsNvViewportSwizzleSupported()) {
-        viewport_ci.pNext = &swizzle_ci;
-    }
+    const VkPipelineViewportStateCreateInfo viewport_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+        .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr,
+        .flags = 0,
+        .viewportCount = Maxwell::NumViewports,
+        .pViewports = nullptr,
+        .scissorCount = Maxwell::NumViewports,
+        .pScissors = nullptr,
+    };
 
-    const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+    VkPipelineRasterizationStateCreateInfo rasterization_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
         .depthClampEnable =
-            static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
+            static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
         .rasterizerDiscardEnable =
-            static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
-        .polygonMode = VK_POLYGON_MODE_FILL,
+            static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
+        .polygonMode =
+            MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
         .cullMode = static_cast<VkCullModeFlags>(
             dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
         .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
-        .depthBiasEnable = state.depth_bias_enable,
+        .depthBiasEnable = key.state.depth_bias_enable,
         .depthBiasConstantFactor = 0.0f,
         .depthBiasClamp = 0.0f,
         .depthBiasSlopeFactor = 0.0f,
         .lineWidth = 1.0f,
     };
+    VkPipelineRasterizationLineStateCreateInfoEXT line_state{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .lineRasterizationMode = key.state.smooth_lines != 0
+                                     ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
+                                     : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
+        .stippledLineEnable = VK_FALSE, // TODO
+        .lineStippleFactor = 0,
+        .lineStipplePattern = 0,
+    };
+    VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .flags = 0,
+        .conservativeRasterizationMode = key.state.conservative_raster_enable != 0
+                                             ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
+                                             : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT,
+        .extraPrimitiveOverestimationSize = 0.0f,
+    };
+    VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .provokingVertexMode = key.state.provoking_vertex_last != 0
+                                   ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT
+                                   : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT,
+    };
+    if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) {
+        line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state);
+    }
+    if (device.IsExtConservativeRasterizationSupported()) {
+        conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster);
+    }
+    if (device.IsExtProvokingVertexSupported()) {
+        provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
+    }
 
     const VkPipelineMultisampleStateCreateInfo multisample_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
+        .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
         .sampleShadingEnable = VK_FALSE,
         .minSampleShading = 0.0f,
         .pSampleMask = nullptr,
         .alphaToCoverageEnable = VK_FALSE,
         .alphaToOneEnable = VK_FALSE,
     };
-
     const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
         .pNext = nullptr,
@@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
         .depthCompareOp = dynamic.depth_test_enable
                               ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
                               : VK_COMPARE_OP_ALWAYS,
-        .depthBoundsTestEnable = dynamic.depth_bounds_enable,
+        .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(),
         .stencilTestEnable = dynamic.stencil_enable,
         .front = GetStencilFaceState(dynamic.front),
         .back = GetStencilFaceState(dynamic.back),
         .minDepthBounds = 0.0f,
         .maxDepthBounds = 0.0f,
     };
-
-    std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
-    for (std::size_t index = 0; index < num_color_buffers; ++index) {
-        static constexpr std::array COMPONENT_TABLE{
+    if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) {
+        LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
+    }
+    static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
+    const size_t num_attachments{NumAttachments(key.state)};
+    for (size_t index = 0; index < num_attachments; ++index) {
+        static constexpr std::array mask_table{
             VK_COLOR_COMPONENT_R_BIT,
             VK_COLOR_COMPONENT_G_BIT,
             VK_COLOR_COMPONENT_B_BIT,
             VK_COLOR_COMPONENT_A_BIT,
         };
-        const auto& blend = state.attachments[index];
-
-        VkColorComponentFlags color_components = 0;
-        for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
-            if (blend.Mask()[i]) {
-                color_components |= COMPONENT_TABLE[i];
-            }
+        const auto& blend{key.state.attachments[index]};
+        const std::array mask{blend.Mask()};
+        VkColorComponentFlags write_mask{};
+        for (size_t i = 0; i < mask_table.size(); ++i) {
+            write_mask |= mask[i] ? mask_table[i] : 0;
         }
-
-        cb_attachments[index] = {
+        cb_attachments.push_back({
             .blendEnable = blend.enable != 0,
             .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
             .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
@@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
             .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
             .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
             .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
-            .colorWriteMask = color_components,
-        };
+            .colorWriteMask = write_mask,
+        });
     }
-
     const VkPipelineColorBlendStateCreateInfo color_blend_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
         .logicOpEnable = VK_FALSE,
         .logicOp = VK_LOGIC_OP_COPY,
-        .attachmentCount = num_color_buffers,
+        .attachmentCount = static_cast<u32>(cb_attachments.size()),
         .pAttachments = cb_attachments.data(),
         .blendConstants = {},
     };
-
-    std::vector dynamic_states{
+    static_vector<VkDynamicState, 19> dynamic_states{
         VK_DYNAMIC_STATE_VIEWPORT,           VK_DYNAMIC_STATE_SCISSOR,
         VK_DYNAMIC_STATE_DEPTH_BIAS,         VK_DYNAMIC_STATE_BLEND_CONSTANTS,
         VK_DYNAMIC_STATE_DEPTH_BOUNDS,       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
         VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+        VK_DYNAMIC_STATE_LINE_WIDTH,
     };
-    if (device.IsExtExtendedDynamicStateSupported()) {
+    if (key.state.extended_dynamic_state) {
         static constexpr std::array extended{
             VK_DYNAMIC_STATE_CULL_MODE_EXT,
             VK_DYNAMIC_STATE_FRONT_FACE_EXT,
@@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
             VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
             VK_DYNAMIC_STATE_STENCIL_OP_EXT,
         };
+        if (key.state.dynamic_vertex_input) {
+            dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
+        }
         dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
     }
-
     const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
         .pNext = nullptr,
@@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
         .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
         .pDynamicStates = dynamic_states.data(),
     };
-
-    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+    [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
         .pNext = nullptr,
         .requiredSubgroupSize = GuestWarpSize,
     };
-
-    std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
-    std::size_t module_index = 0;
-    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
-        if (!program[stage]) {
+    static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
+    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        if (!spv_modules[stage]) {
             continue;
         }
-
-        VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
-        stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
-        stage_ci.pNext = nullptr;
-        stage_ci.flags = 0;
-        stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage));
-        stage_ci.module = *modules[module_index++];
-        stage_ci.pName = "main";
-        stage_ci.pSpecializationInfo = nullptr;
-
+        [[maybe_unused]] auto& stage_ci =
+            shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .pNext = nullptr,
+                .flags = 0,
+                .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)),
+                .module = *spv_modules[stage],
+                .pName = "main",
+                .pSpecializationInfo = nullptr,
+            });
+        /*
         if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
             stage_ci.pNext = &subgroup_size_ci;
         }
+        */
     }
-    return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
+    pipeline = device.GetLogical().CreateGraphicsPipeline({
         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
@@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
         .pDepthStencilState = &depth_stencil_ci,
         .pColorBlendState = &color_blend_ci,
         .pDynamicState = &dynamic_state_ci,
-        .layout = *layout,
-        .renderPass = renderpass,
+        .layout = *pipeline_layout,
+        .renderPass = render_pass,
         .subpass = 0,
         .basePipelineHandle = nullptr,
         .basePipelineIndex = 0,
     });
 }
 
+void GraphicsPipeline::Validate() {
+    size_t num_images{};
+    for (const auto& info : stage_infos) {
+        for (const auto& desc : info.texture_buffer_descriptors) {
+            num_images += desc.count;
+        }
+        for (const auto& desc : info.image_buffer_descriptors) {
+            num_images += desc.count;
+        }
+        for (const auto& desc : info.texture_descriptors) {
+            num_images += desc.count;
+        }
+        for (const auto& desc : info.image_descriptors) {
+            num_images += desc.count;
+        }
+    }
+    ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 8b6a98fe0..2bd48d697 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -1,30 +1,36 @@
-// Copyright 2019 yuzu Emulator Project
+// Copyright 2021 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
 
+#include <algorithm>
 #include <array>
-#include <optional>
-#include <vector>
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <type_traits>
 
-#include "common/common_types.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
-namespace Vulkan {
+namespace VideoCore {
+class ShaderNotify;
+}
 
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+namespace Vulkan {
 
 struct GraphicsPipelineCacheKey {
-    VkRenderPass renderpass;
-    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
-    FixedPipelineState fixed_state;
+    std::array<u64, 6> unique_hashes;
+    FixedPipelineState state;
 
-    std::size_t Hash() const noexcept;
+    size_t Hash() const noexcept;
 
     bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
 
@@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey {
         return !operator==(rhs);
     }
 
-    std::size_t Size() const noexcept {
-        return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
+    size_t Size() const noexcept {
+        return sizeof(unique_hashes) + state.Size();
     }
 };
 static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
 static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
 static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 
+} // namespace Vulkan
+
+namespace std {
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+    size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+} // namespace std
+
+namespace Vulkan {
+
 class Device;
-class VKDescriptorPool;
+class RenderPassCache;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 
-using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
+class GraphicsPipeline {
+    static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
 
-class VKGraphicsPipeline final {
 public:
-    explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
-                                VKDescriptorPool& descriptor_pool,
-                                VKUpdateDescriptorQueue& update_descriptor_queue_,
-                                const GraphicsPipelineCacheKey& key,
-                                vk::Span<VkDescriptorSetLayoutBinding> bindings,
-                                const SPIRVProgram& program, u32 num_color_buffers);
-    ~VKGraphicsPipeline();
-
-    VkDescriptorSet CommitDescriptorSet();
-
-    VkPipeline GetHandle() const {
-        return *pipeline;
+    explicit GraphicsPipeline(
+        Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
+        VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
+        VideoCore::ShaderNotify* shader_notify, const Device& device,
+        DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
+        Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache,
+        const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
+        const std::array<const Shader::Info*, NUM_STAGES>& infos);
+
+    GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
+    GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
+
+    GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
+    GraphicsPipeline(const GraphicsPipeline&) = delete;
+
+    void AddTransition(GraphicsPipeline* transition);
+
+    void Configure(bool is_indexed) {
+        configure_func(this, is_indexed);
     }
 
-    VkPipelineLayout GetLayout() const {
-        return *layout;
+    [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
+        if (key == current_key) {
+            return this;
+        }
+        const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
+        return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
+                                           : nullptr;
     }
 
-    GraphicsPipelineCacheKey GetCacheKey() const {
-        return cache_key;
+    [[nodiscard]] bool IsBuilt() const noexcept {
+        return is_built.load(std::memory_order::relaxed);
     }
 
-private:
-    vk::DescriptorSetLayout CreateDescriptorSetLayout(
-        vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
+    template <typename Spec>
+    static auto MakeConfigureSpecFunc() {
+        return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
+    }
 
-    vk::PipelineLayout CreatePipelineLayout() const;
+private:
+    template <typename Spec>
+    void ConfigureImpl(bool is_indexed);
 
-    vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
-        const SPIRVProgram& program) const;
+    void ConfigureDraw();
 
-    std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
+    void MakePipeline(VkRenderPass render_pass);
 
-    vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
-                                u32 num_color_buffers) const;
+    void Validate();
 
+    const GraphicsPipelineCacheKey key;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::MemoryManager& gpu_memory;
     const Device& device;
+    TextureCache& texture_cache;
+    BufferCache& buffer_cache;
     VKScheduler& scheduler;
-    const GraphicsPipelineCacheKey cache_key;
-    const u64 hash;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+
+    void (*configure_func)(GraphicsPipeline*, bool){};
+
+    std::vector<GraphicsPipelineCacheKey> transition_keys;
+    std::vector<GraphicsPipeline*> transitions;
+
+    std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
+
+    std::array<Shader::Info, NUM_STAGES> stage_infos;
+    std::array<u32, 5> enabled_uniform_buffer_masks{};
+    VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
 
     vk::DescriptorSetLayout descriptor_set_layout;
     DescriptorAllocator descriptor_allocator;
-    VKUpdateDescriptorQueue& update_descriptor_queue;
-    vk::PipelineLayout layout;
-    vk::DescriptorUpdateTemplateKHR descriptor_template;
-    std::vector<vk::ShaderModule> modules;
-
+    vk::PipelineLayout pipeline_layout;
+    vk::DescriptorUpdateTemplateKHR descriptor_update_template;
     vk::Pipeline pipeline;
+
+    std::condition_variable build_condvar;
+    std::mutex build_mutex;
+    std::atomic_bool is_built{false};
+    bool uses_push_descriptor{false};
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index ee3cd35d0..4f8688118 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -39,9 +39,9 @@ public:
         return KnownGpuTick() >= tick;
     }
 
-    /// Advance to the logical tick.
-    void NextTick() noexcept {
-        ++current_tick;
+    /// Advance to the logical tick and return the old one
+    [[nodiscard]] u64 NextTick() noexcept {
+        return current_tick.fetch_add(1, std::memory_order::relaxed);
     }
 
     /// Refresh the known GPU tick
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8991505ca..57b163247 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -4,444 +4,613 @@
 
 #include <algorithm>
 #include <cstddef>
+#include <fstream>
 #include <memory>
+#include <thread>
 #include <vector>
 
 #include "common/bit_cast.h"
 #include "common/cityhash.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
 #include "common/microprofile.h"
+#include "common/thread_worker.h"
 #include "core/core.h"
 #include "core/memory.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/program_header.h"
+#include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/pipeline_helper.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"
 #include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
 #include "video_core/shader_notify.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
-
 MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
 namespace {
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
+
+constexpr u32 CACHE_VERSION = 5;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+    return std::span(container.data(), container.size());
+}
 
-constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
-constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
-constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
-constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
-constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-
-constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
-    .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
-    .disable_else_derivation = true,
-};
-
-constexpr std::size_t GetStageFromProgram(std::size_t program) {
-    return program == 0 ? 0 : program - 1;
+Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
+    switch (comparison) {
+    case Maxwell::ComparisonOp::Never:
+    case Maxwell::ComparisonOp::NeverOld:
+        return Shader::CompareFunction::Never;
+    case Maxwell::ComparisonOp::Less:
+    case Maxwell::ComparisonOp::LessOld:
+        return Shader::CompareFunction::Less;
+    case Maxwell::ComparisonOp::Equal:
+    case Maxwell::ComparisonOp::EqualOld:
+        return Shader::CompareFunction::Equal;
+    case Maxwell::ComparisonOp::LessEqual:
+    case Maxwell::ComparisonOp::LessEqualOld:
+        return Shader::CompareFunction::LessThanEqual;
+    case Maxwell::ComparisonOp::Greater:
+    case Maxwell::ComparisonOp::GreaterOld:
+        return Shader::CompareFunction::Greater;
+    case Maxwell::ComparisonOp::NotEqual:
+    case Maxwell::ComparisonOp::NotEqualOld:
+        return Shader::CompareFunction::NotEqual;
+    case Maxwell::ComparisonOp::GreaterEqual:
+    case Maxwell::ComparisonOp::GreaterEqualOld:
+        return Shader::CompareFunction::GreaterThanEqual;
+    case Maxwell::ComparisonOp::Always:
+    case Maxwell::ComparisonOp::AlwaysOld:
+        return Shader::CompareFunction::Always;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
+    return {};
 }
 
-constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
-    return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
+    if (attr.enabled == 0) {
+        return Shader::AttributeType::Disabled;
+    }
+    switch (attr.Type()) {
+    case Maxwell::VertexAttribute::Type::SignedNorm:
+    case Maxwell::VertexAttribute::Type::UnsignedNorm:
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+    case Maxwell::VertexAttribute::Type::SignedScaled:
+    case Maxwell::VertexAttribute::Type::Float:
+        return Shader::AttributeType::Float;
+    case Maxwell::VertexAttribute::Type::SignedInt:
+        return Shader::AttributeType::SignedInt;
+    case Maxwell::VertexAttribute::Type::UnsignedInt:
+        return Shader::AttributeType::UnsignedInt;
+    }
+    return Shader::AttributeType::Float;
 }
 
-ShaderType GetShaderType(Maxwell::ShaderProgram program) {
-    switch (program) {
-    case Maxwell::ShaderProgram::VertexB:
-        return ShaderType::Vertex;
-    case Maxwell::ShaderProgram::TesselationControl:
-        return ShaderType::TesselationControl;
-    case Maxwell::ShaderProgram::TesselationEval:
-        return ShaderType::TesselationEval;
-    case Maxwell::ShaderProgram::Geometry:
-        return ShaderType::Geometry;
-    case Maxwell::ShaderProgram::Fragment:
-        return ShaderType::Fragment;
-    default:
-        UNIMPLEMENTED_MSG("program={}", program);
-        return ShaderType::Vertex;
+Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) {
+    switch (state.DynamicAttributeType(index)) {
+    case 0:
+        return Shader::AttributeType::Disabled;
+    case 1:
+        return Shader::AttributeType::Float;
+    case 2:
+        return Shader::AttributeType::SignedInt;
+    case 3:
+        return Shader::AttributeType::UnsignedInt;
     }
+    return Shader::AttributeType::Disabled;
 }
 
-template <VkDescriptorType descriptor_type, class Container>
-void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding,
-                 VkShaderStageFlags stage_flags, const Container& container) {
-    const u32 num_entries = static_cast<u32>(std::size(container));
-    for (std::size_t i = 0; i < num_entries; ++i) {
-        u32 count = 1;
-        if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
-            // Combined image samplers can be arrayed.
-            count = container[i].size;
+Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs,
+                                    const GraphicsPipelineCacheKey& key,
+                                    const Shader::IR::Program& program,
+                                    const Shader::IR::Program* previous_program) {
+    Shader::RuntimeInfo info;
+    if (previous_program) {
+        info.previous_stage_stores = previous_program->info.stores;
+        if (previous_program->is_geometry_passthrough) {
+            info.previous_stage_stores.mask |= previous_program->info.passthrough.mask;
         }
-        bindings.push_back({
-            .binding = binding++,
-            .descriptorType = descriptor_type,
-            .descriptorCount = count,
-            .stageFlags = stage_flags,
-            .pImmutableSamplers = nullptr,
-        });
+    } else {
+        info.previous_stage_stores.mask.set();
+    }
+    const Shader::Stage stage{program.stage};
+    const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough};
+    const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
+    const float point_size{Common::BitCast<float>(key.state.point_size)};
+    switch (stage) {
+    case Shader::Stage::VertexB:
+        if (!has_geometry) {
+            if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
+                info.fixed_state_point_size = point_size;
+            }
+            if (key.state.xfb_enabled) {
+                info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+            }
+            info.convert_depth_mode = gl_ndc;
+        }
+        if (key.state.dynamic_vertex_input) {
+            for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+                info.generic_input_types[index] = AttributeType(key.state, index);
+            }
+        } else {
+            std::ranges::transform(key.state.attributes, info.generic_input_types.begin(),
+                                   &CastAttributeType);
+        }
+        break;
+    case Shader::Stage::TessellationEval:
+        // We have to flip tessellation clockwise for some reason...
+        info.tess_clockwise = key.state.tessellation_clockwise == 0;
+        info.tess_primitive = [&key] {
+            const u32 raw{key.state.tessellation_primitive.Value()};
+            switch (static_cast<Maxwell::TessellationPrimitive>(raw)) {
+            case Maxwell::TessellationPrimitive::Isolines:
+                return Shader::TessPrimitive::Isolines;
+            case Maxwell::TessellationPrimitive::Triangles:
+                return Shader::TessPrimitive::Triangles;
+            case Maxwell::TessellationPrimitive::Quads:
+                return Shader::TessPrimitive::Quads;
+            }
+            UNREACHABLE();
+            return Shader::TessPrimitive::Triangles;
+        }();
+        info.tess_spacing = [&] {
+            const u32 raw{key.state.tessellation_spacing};
+            switch (static_cast<Maxwell::TessellationSpacing>(raw)) {
+            case Maxwell::TessellationSpacing::Equal:
+                return Shader::TessSpacing::Equal;
+            case Maxwell::TessellationSpacing::FractionalOdd:
+                return Shader::TessSpacing::FractionalOdd;
+            case Maxwell::TessellationSpacing::FractionalEven:
+                return Shader::TessSpacing::FractionalEven;
+            }
+            UNREACHABLE();
+            return Shader::TessSpacing::Equal;
+        }();
+        break;
+    case Shader::Stage::Geometry:
+        if (program.output_topology == Shader::OutputTopology::PointList) {
+            info.fixed_state_point_size = point_size;
+        }
+        if (key.state.xfb_enabled != 0) {
+            info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+        }
+        info.convert_depth_mode = gl_ndc;
+        break;
+    case Shader::Stage::Fragment:
+        info.alpha_test_func = MaxwellToCompareFunction(
+            key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
+        info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref);
+        break;
+    default:
+        break;
+    }
+    switch (key.state.topology) {
+    case Maxwell::PrimitiveTopology::Points:
+        info.input_topology = Shader::InputTopology::Points;
+        break;
+    case Maxwell::PrimitiveTopology::Lines:
+    case Maxwell::PrimitiveTopology::LineLoop:
+    case Maxwell::PrimitiveTopology::LineStrip:
+        info.input_topology = Shader::InputTopology::Lines;
+        break;
+    case Maxwell::PrimitiveTopology::Triangles:
+    case Maxwell::PrimitiveTopology::TriangleStrip:
+    case Maxwell::PrimitiveTopology::TriangleFan:
+    case Maxwell::PrimitiveTopology::Quads:
+    case Maxwell::PrimitiveTopology::QuadStrip:
+    case Maxwell::PrimitiveTopology::Polygon:
+    case Maxwell::PrimitiveTopology::Patches:
+        info.input_topology = Shader::InputTopology::Triangles;
+        break;
+    case Maxwell::PrimitiveTopology::LinesAdjacency:
+    case Maxwell::PrimitiveTopology::LineStripAdjacency:
+        info.input_topology = Shader::InputTopology::LinesAdjacency;
+        break;
+    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+        info.input_topology = Shader::InputTopology::TrianglesAdjacency;
+        break;
     }
+    info.force_early_z = key.state.early_z != 0;
+    info.y_negate = key.state.y_negate != 0;
+    return info;
 }
+} // Anonymous namespace
 
-u32 FillDescriptorLayout(const ShaderEntries& entries,
-                         std::vector<VkDescriptorSetLayoutBinding>& bindings,
-                         Maxwell::ShaderProgram program_type, u32 base_binding) {
-    const ShaderType stage = GetStageFromProgram(program_type);
-    const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
-
-    u32 binding = base_binding;
-    AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
-    AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
-    AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
-    AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
-    AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
-    AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
-    return binding;
+size_t ComputePipelineCacheKey::Hash() const noexcept {
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    return static_cast<size_t>(hash);
 }
 
-} // Anonymous namespace
+bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+    return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
 
-std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
+size_t GraphicsPipelineCacheKey::Hash() const noexcept {
     const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
-    return static_cast<std::size_t>(hash);
+    return static_cast<size_t>(hash);
 }
 
 bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
     return std::memcmp(&rhs, this, Size()) == 0;
 }
 
-std::size_t ComputePipelineCacheKey::Hash() const noexcept {
-    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
-    return static_cast<std::size_t>(hash);
-}
-
-bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
-    return std::memcmp(&rhs, this, sizeof *this) == 0;
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
+                             Tegra::Engines::KeplerCompute& kepler_compute_,
+                             Tegra::MemoryManager& gpu_memory_, const Device& device_,
+                             VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
+                             VKUpdateDescriptorQueue& update_descriptor_queue_,
+                             RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
+                             TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
+    : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+      device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+      update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
+      buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
+      use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
+      workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
+      serialization_thread(1, "yuzu:PipelineSerialization") {
+    const auto& float_control{device.FloatControlProperties()};
+    const VkDriverIdKHR driver_id{device.GetDriverID()};
+    profile = Shader::Profile{
+        .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
+        .unified_descriptor_binding = true,
+        .support_descriptor_aliasing = true,
+        .support_int8 = true,
+        .support_int16 = device.IsShaderInt16Supported(),
+        .support_int64 = device.IsShaderInt64Supported(),
+        .support_vertex_instance_id = false,
+        .support_float_controls = true,
+        .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
+                                            VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+        .support_separate_rounding_mode =
+            float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+        .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
+        .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
+        .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
+        .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
+        .support_fp16_signed_zero_nan_preserve =
+            float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
+        .support_fp32_signed_zero_nan_preserve =
+            float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
+        .support_fp64_signed_zero_nan_preserve =
+            float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
+        .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
+        .support_vote = true,
+        .support_viewport_index_layer_non_geometry =
+            device.IsExtShaderViewportIndexLayerSupported(),
+        .support_viewport_mask = device.IsNvViewportArray2Supported(),
+        .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
+        .support_demote_to_helper_invocation = true,
+        .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
+        .support_derivative_control = true,
+        .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
+
+        .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
+
+        .lower_left_origin_mode = false,
+        .need_declared_frag_colors = false,
+
+        .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
+        .has_broken_unsigned_image_offsets = false,
+        .has_broken_signed_operations = false,
+        .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
+        .ignore_nan_fp_comparisons = false,
+    };
+    host_info = Shader::HostTranslateInfo{
+        .support_float16 = device.IsFloat16Supported(),
+        .support_int64 = device.IsShaderInt64Supported(),
+    };
 }
 
-Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_,
-               GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
-    : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
-      shader_ir(program_code, main_offset_, compiler_settings, registry),
-      entries(GenerateShaderEntries(shader_ir)) {}
-
-Shader::~Shader() = default;
-
-VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
-                                 Tegra::Engines::Maxwell3D& maxwell3d_,
-                                 Tegra::Engines::KeplerCompute& kepler_compute_,
-                                 Tegra::MemoryManager& gpu_memory_, const Device& device_,
-                                 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
-                                 VKUpdateDescriptorQueue& update_descriptor_queue_)
-    : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
-      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
-      scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
-                                                                    update_descriptor_queue_} {}
-
-VKPipelineCache::~VKPipelineCache() = default;
+PipelineCache::~PipelineCache() = default;
 
-std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
-    std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
-
-    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
-        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
-        // Skip stages that are not enabled
-        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
-            continue;
-        }
-
-        const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
-        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
-        ASSERT(cpu_addr);
-
-        Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
-        if (!result) {
-            const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
-
-            // No shader found - create a new one
-            static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
-            const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
-            ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
-            const std::size_t size_in_bytes = code.size() * sizeof(u64);
-
-            auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
-                                                   std::move(code), stage_offset);
-            result = shader.get();
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
-            if (cpu_addr) {
-                Register(std::move(shader), *cpu_addr, size_in_bytes);
-            } else {
-                null_shader = std::move(shader);
-            }
+    if (!RefreshStages(graphics_key.unique_hashes)) {
+        current_pipeline = nullptr;
+        return nullptr;
+    }
+    graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
+                               device.IsExtVertexInputDynamicStateSupported());
+
+    if (current_pipeline) {
+        GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
+        if (next) {
+            current_pipeline = next;
+            return BuiltPipeline(current_pipeline);
         }
-        shaders[index] = result;
     }
-    return last_shaders = shaders;
+    return CurrentGraphicsPipelineSlowPath();
 }
 
-VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
-    const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
-    VideoCommon::Shader::AsyncShaders& async_shaders) {
+ComputePipeline* PipelineCache::CurrentComputePipeline() {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
-    if (last_graphics_pipeline && last_graphics_key == key) {
-        return last_graphics_pipeline;
-    }
-    last_graphics_key = key;
-
-    if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
-        std::unique_lock lock{pipeline_cache};
-        const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
-        if (is_cache_miss) {
-            gpu.ShaderNotify().MarkSharderBuilding();
-            LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-            const auto [program, bindings] = DecompileShaders(key.fixed_state);
-            async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
-                                            update_descriptor_queue, bindings, program, key,
-                                            num_color_buffers);
-        }
-        last_graphics_pipeline = pair->second.get();
-        return last_graphics_pipeline;
+    const ShaderInfo* const shader{ComputeShader()};
+    if (!shader) {
+        return nullptr;
     }
-
-    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
-    auto& entry = pair->second;
-    if (is_cache_miss) {
-        gpu.ShaderNotify().MarkSharderBuilding();
-        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-        const auto [program, bindings] = DecompileShaders(key.fixed_state);
-        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
-                                                     update_descriptor_queue, key, bindings,
-                                                     program, num_color_buffers);
-        gpu.ShaderNotify().MarkShaderComplete();
+    const auto& qmd{kepler_compute.launch_description};
+    const ComputePipelineCacheKey key{
+        .unique_hash = shader->unique_hash,
+        .shared_memory_size = qmd.shared_alloc,
+        .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+    };
+    const auto [pair, is_new]{compute_cache.try_emplace(key)};
+    auto& pipeline{pair->second};
+    if (!is_new) {
+        return pipeline.get();
     }
-    last_graphics_pipeline = entry.get();
-    return last_graphics_pipeline;
+    pipeline = CreateComputePipeline(key, shader);
+    return pipeline.get();
 }
 
-VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
-    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
-
-    const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
-    auto& entry = pair->second;
-    if (!is_cache_miss) {
-        return *entry;
+void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+                                      const VideoCore::DiskResourceLoadCallback& callback) {
+    if (title_id == 0) {
+        return;
     }
-    LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-
-    const GPUVAddr gpu_addr = key.shader;
-
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
-    ASSERT(cpu_addr);
+    const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+    const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
+    if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+        LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
+        return;
+    }
+    pipeline_cache_filename = base_dir / "vulkan.bin";
+
+    struct {
+        std::mutex mutex;
+        size_t total{};
+        size_t built{};
+        bool has_loaded{};
+    } state;
+
+    const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+        ComputePipelineCacheKey key;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+        workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
+            ShaderPools pools;
+            auto pipeline{CreateComputePipeline(pools, key, env, false)};
+            std::lock_guard lock{state.mutex};
+            if (pipeline) {
+                compute_cache.emplace(key, std::move(pipeline));
+            }
+            ++state.built;
+            if (state.has_loaded) {
+                callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+            }
+        });
+        ++state.total;
+    }};
+    const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
+    const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
+    const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+        GraphicsPipelineCacheKey key;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+        if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
+            (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
+            return;
+        }
+        workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
+            ShaderPools pools;
+            boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+            for (auto& env : envs) {
+                env_ptrs.push_back(&env);
+            }
+            auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
 
-    Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
-    if (!shader) {
-        // No shader found - create a new one
-        const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
+            std::lock_guard lock{state.mutex};
+            graphics_cache.emplace(key, std::move(pipeline));
+            ++state.built;
+            if (state.has_loaded) {
+                callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+            }
+        });
+        ++state.total;
+    }};
+    VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
+                               load_graphics);
 
-        ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
-        const std::size_t size_in_bytes = code.size() * sizeof(u64);
+    std::unique_lock lock{state.mutex};
+    callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+    state.has_loaded = true;
+    lock.unlock();
 
-        auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
-                                                    *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
-        shader = shader_info.get();
+    workers.WaitForRequests();
+}
 
-        if (cpu_addr) {
-            Register(std::move(shader_info), *cpu_addr, size_in_bytes);
-        } else {
-            null_kernel = std::move(shader_info);
-        }
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
+    const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+    auto& pipeline{pair->second};
+    if (is_new) {
+        pipeline = CreateGraphicsPipeline();
     }
-
-    const Specialization specialization{
-        .base_binding = 0,
-        .workgroup_size = key.workgroup_size,
-        .shared_memory_size = key.shared_memory_size,
-        .point_size = std::nullopt,
-        .enabled_attributes = {},
-        .attribute_types = {},
-        .ndc_minus_one_to_one = false,
-    };
-    const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
-                                             shader->GetRegistry(), specialization),
-                                   shader->GetEntries()};
-    entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
-                                                update_descriptor_queue, spirv_shader);
-    return *entry;
+    if (!pipeline) {
+        return nullptr;
+    }
+    if (current_pipeline) {
+        current_pipeline->AddTransition(pipeline.get());
+    }
+    current_pipeline = pipeline.get();
+    return BuiltPipeline(current_pipeline);
 }
 
-void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
-    gpu.ShaderNotify().MarkShaderComplete();
-    std::unique_lock lock{pipeline_cache};
-    graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
+    if (pipeline->IsBuilt()) {
+        return pipeline;
+    }
+    if (!use_asynchronous_shaders) {
+        return pipeline;
+    }
+    // If something is using depth, we can assume that games are not rendering anything which
+    // will be used one time.
+    if (maxwell3d.regs.zeta_enable) {
+        return nullptr;
+    }
+    // If games are using a small index count, we can assume these are full screen quads.
+    // Usually these shaders are only used once for building textures so we can assume they
+    // can't be built async
+    if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+        return pipeline;
+    }
+    return nullptr;
 }
 
-void VKPipelineCache::OnShaderRemoval(Shader* shader) {
-    bool finished = false;
-    const auto Finish = [&] {
-        // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
-        // flush.
-        if (finished) {
-            return;
-        }
-        finished = true;
-        scheduler.Finish();
-    };
-
-    const GPUVAddr invalidated_addr = shader->GetGpuAddr();
-    for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
-        auto& entry = it->first;
-        if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
-            entry.shaders.end()) {
-            ++it;
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
+    ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+    std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+    LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
+    size_t env_index{0};
+    std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+    const bool uses_vertex_a{key.unique_hashes[0] != 0};
+    const bool uses_vertex_b{key.unique_hashes[1] != 0};
+    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        if (key.unique_hashes[index] == 0) {
             continue;
         }
-        Finish();
-        it = graphics_cache.erase(it);
+        Shader::Environment& env{*envs[env_index]};
+        ++env_index;
+
+        const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+        Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+        if (!uses_vertex_a || index != 1) {
+            // Normal path
+            programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
+        } else {
+            // VertexB path when VertexA is present.
+            auto& program_va{programs[0]};
+            auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+            programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
+        }
     }
-    for (auto it = compute_cache.begin(); it != compute_cache.end();) {
-        auto& entry = it->first;
-        if (entry.shader != invalidated_addr) {
-            ++it;
+    std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
+    std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
+
+    const Shader::IR::Program* previous_stage{};
+    Shader::Backend::Bindings binding;
+    for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
+         ++index) {
+        if (key.unique_hashes[index] == 0) {
             continue;
         }
-        Finish();
-        it = compute_cache.erase(it);
+        UNIMPLEMENTED_IF(index == 0);
+
+        Shader::IR::Program& program{programs[index]};
+        const size_t stage_index{index - 1};
+        infos[stage_index] = &program.info;
+
+        const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
+        const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
+        device.SaveShader(code);
+        modules[stage_index] = BuildShader(device, code);
+        if (device.HasDebuggingToolAttached()) {
+            const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
+            modules[stage_index].SetObjectNameEXT(name.c_str());
+        }
+        previous_stage = &program;
     }
+    Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+    return std::make_unique<GraphicsPipeline>(
+        maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
+        descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key,
+        std::move(modules), infos);
+
+} catch (const Shader::Exception& exception) {
+    LOG_ERROR(Render_Vulkan, "{}", exception.what());
+    return nullptr;
 }
 
-std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
-    Specialization specialization;
-    if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
-        float point_size;
-        std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
-        specialization.point_size = point_size;
-        ASSERT(point_size != 0.0f);
-    }
-    for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
-        const auto& attribute = fixed_state.attributes[i];
-        specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
-        specialization.attribute_types[i] = attribute.Type();
-    }
-    specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
-    specialization.early_fragment_tests = fixed_state.early_z;
-
-    // Alpha test
-    specialization.alpha_test_func =
-        FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
-    specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
-
-    SPIRVProgram program;
-    std::vector<VkDescriptorSetLayoutBinding> bindings;
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
+    GraphicsEnvironments environments;
+    GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
 
-    for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) {
-        const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
-        // Skip stages that are not enabled
-        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
-            continue;
-        }
-        const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
-        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
-        Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
-
-        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
-        const ShaderType program_type = GetShaderType(program_enum);
-        const auto& entries = shader->GetEntries();
-        program[stage] = {
-            Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
-            entries,
-        };
-
-        const u32 old_binding = specialization.base_binding;
-        specialization.base_binding =
-            FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
-        ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
+    main_pools.ReleaseContents();
+    auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
+    if (!pipeline || pipeline_cache_filename.empty()) {
+        return pipeline;
     }
-    return {std::move(program), std::move(bindings)};
-}
-
-template <VkDescriptorType descriptor_type, class Container>
-void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding,
-              u32& offset, const Container& container) {
-    static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
-    const u32 count = static_cast<u32>(std::size(container));
-
-    if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
-        for (u32 i = 0; i < count; ++i) {
-            const u32 num_samplers = container[i].size;
-            template_entries.push_back({
-                .dstBinding = binding,
-                .dstArrayElement = 0,
-                .descriptorCount = num_samplers,
-                .descriptorType = descriptor_type,
-                .offset = offset,
-                .stride = entry_size,
-            });
-
-            ++binding;
-            offset += num_samplers * entry_size;
+    serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
+        boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
+            env_ptrs;
+        for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+            if (key.unique_hashes[index] != 0) {
+                env_ptrs.push_back(&envs[index]);
+            }
         }
-        return;
-    }
+        SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION);
+    });
+    return pipeline;
+}
 
-    if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
-                  descriptor_type == STORAGE_TEXEL_BUFFER) {
-        // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
-        // Note: Fixed in driver Windows 443.24, Linux 440.66.15
-        for (u32 i = 0; i < count; ++i) {
-            template_entries.push_back({
-                .dstBinding = binding + i,
-                .dstArrayElement = 0,
-                .descriptorCount = 1,
-                .descriptorType = descriptor_type,
-                .offset = static_cast<std::size_t>(offset + i * entry_size),
-                .stride = entry_size,
-            });
-        }
-    } else if (count > 0) {
-        template_entries.push_back({
-            .dstBinding = binding,
-            .dstArrayElement = 0,
-            .descriptorCount = count,
-            .descriptorType = descriptor_type,
-            .offset = offset,
-            .stride = entry_size,
-        });
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+    const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
+    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+    const auto& qmd{kepler_compute.launch_description};
+    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    env.SetCachedSize(shader->size_bytes);
+
+    main_pools.ReleaseContents();
+    auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
+    if (!pipeline || pipeline_cache_filename.empty()) {
+        return pipeline;
     }
-    offset += count * entry_size;
-    binding += count;
+    serialization_thread.QueueWork([this, key, env = std::move(env)] {
+        SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
+                          pipeline_cache_filename, CACHE_VERSION);
+    });
+    return pipeline;
 }
 
-void FillDescriptorUpdateTemplateEntries(
-    const ShaderEntries& entries, u32& binding, u32& offset,
-    std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
-    AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
-    AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
-    AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
-    AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
-    AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
-    AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+    ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
+    bool build_in_parallel) try {
+    LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
+
+    Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+    auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+    const std::vector<u32> code{EmitSPIRV(profile, program)};
+    device.SaveShader(code);
+    vk::ShaderModule spv_module{BuildShader(device, code)};
+    if (device.HasDebuggingToolAttached()) {
+        const auto name{fmt::format("Shader {:016x}", key.unique_hash)};
+        spv_module.SetObjectNameEXT(name.c_str());
+    }
+    Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+    return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
+                                             thread_worker, &shader_notify, program.info,
+                                             std::move(spv_module));
+
+} catch (const Shader::Exception& exception) {
+    LOG_ERROR(Render_Vulkan, "{}", exception.what());
+    return nullptr;
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 89d635a3d..efe5a7ed8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,24 +6,28 @@
 
 #include <array>
 #include <cstddef>
+#include <filesystem>
+#include <iosfwd>
 #include <memory>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 
-#include <boost/functional/hash.hpp>
-
 #include "common/common_types.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/profile.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/shader/async_shaders.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/shader_cache.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
@@ -31,23 +35,24 @@ namespace Core {
 class System;
 }
 
-namespace Vulkan {
+namespace Shader::IR {
+struct Program;
+}
 
-class Device;
-class RasterizerVulkan;
-class VKComputePipeline;
-class VKDescriptorPool;
-class VKScheduler;
-class VKUpdateDescriptorQueue;
+namespace VideoCore {
+class ShaderNotify;
+}
+
+namespace Vulkan {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct ComputePipelineCacheKey {
-    GPUVAddr shader;
+    u64 unique_hash;
     u32 shared_memory_size;
     std::array<u32, 3> workgroup_size;
 
-    std::size_t Hash() const noexcept;
+    size_t Hash() const noexcept;
 
     bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
 
@@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
 namespace std {
 
 template <>
-struct hash<Vulkan::GraphicsPipelineCacheKey> {
-    std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
-        return k.Hash();
-    }
-};
-
-template <>
 struct hash<Vulkan::ComputePipelineCacheKey> {
-    std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
+    size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
         return k.Hash();
     }
 };
@@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
 
 namespace Vulkan {
 
-class Shader {
-public:
-    explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_,
-                    Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_,
-                    VideoCommon::Shader::ProgramCode program_code, u32 main_offset_);
-    ~Shader();
-
-    GPUVAddr GetGpuAddr() const {
-        return gpu_addr;
-    }
-
-    VideoCommon::Shader::ShaderIR& GetIR() {
-        return shader_ir;
-    }
-
-    const VideoCommon::Shader::ShaderIR& GetIR() const {
-        return shader_ir;
-    }
+class ComputePipeline;
+class Device;
+class DescriptorPool;
+class RasterizerVulkan;
+class RenderPassCache;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
 
-    const VideoCommon::Shader::Registry& GetRegistry() const {
-        return registry;
-    }
+using VideoCommon::ShaderInfo;
 
-    const ShaderEntries& GetEntries() const {
-        return entries;
+struct ShaderPools {
+    void ReleaseContents() {
+        flow_block.ReleaseContents();
+        block.ReleaseContents();
+        inst.ReleaseContents();
     }
 
-private:
-    GPUVAddr gpu_addr{};
-    VideoCommon::Shader::ProgramCode program_code;
-    VideoCommon::Shader::Registry registry;
-    VideoCommon::Shader::ShaderIR shader_ir;
-    ShaderEntries entries;
+    Shader::ObjectPool<Shader::IR::Inst> inst;
+    Shader::ObjectPool<Shader::IR::Block> block;
+    Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
 };
 
-class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
+class PipelineCache : public VideoCommon::ShaderCache {
 public:
-    explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
-                             Tegra::Engines::Maxwell3D& maxwell3d,
-                             Tegra::Engines::KeplerCompute& kepler_compute,
-                             Tegra::MemoryManager& gpu_memory, const Device& device,
-                             VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
-                             VKUpdateDescriptorQueue& update_descriptor_queue);
-    ~VKPipelineCache() override;
+    explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+                           Tegra::Engines::KeplerCompute& kepler_compute,
+                           Tegra::MemoryManager& gpu_memory, const Device& device,
+                           VKScheduler& scheduler, DescriptorPool& descriptor_pool,
+                           VKUpdateDescriptorQueue& update_descriptor_queue,
+                           RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
+                           TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
+    ~PipelineCache();
+
+    [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
 
-    std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
+    [[nodiscard]] ComputePipeline* CurrentComputePipeline();
 
-    VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
-                                            u32 num_color_buffers,
-                                            VideoCommon::Shader::AsyncShaders& async_shaders);
+    void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+                           const VideoCore::DiskResourceLoadCallback& callback);
 
-    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+private:
+    [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
 
-    void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
+    [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
 
-protected:
-    void OnShaderRemoval(Shader* shader) final;
+    std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
 
-private:
-    std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
-        const FixedPipelineState& fixed_state);
+    std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
+        ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+        std::span<Shader::Environment* const> envs, bool build_in_parallel);
 
-    Tegra::GPU& gpu;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-    Tegra::MemoryManager& gpu_memory;
+    std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
+                                                           const ShaderInfo* shader);
+
+    std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
+                                                           const ComputePipelineCacheKey& key,
+                                                           Shader::Environment& env,
+                                                           bool build_in_parallel);
 
     const Device& device;
     VKScheduler& scheduler;
-    VKDescriptorPool& descriptor_pool;
+    DescriptorPool& descriptor_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
+    RenderPassCache& render_pass_cache;
+    BufferCache& buffer_cache;
+    TextureCache& texture_cache;
+    VideoCore::ShaderNotify& shader_notify;
+    bool use_asynchronous_shaders{};
 
-    std::unique_ptr<Shader> null_shader;
-    std::unique_ptr<Shader> null_kernel;
+    GraphicsPipelineCacheKey graphics_key{};
+    GraphicsPipeline* current_pipeline{};
 
-    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
+    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
 
-    GraphicsPipelineCacheKey last_graphics_key;
-    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+    ShaderPools main_pools;
 
-    std::mutex pipeline_cache;
-    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
-        graphics_cache;
-    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
-};
+    Shader::Profile profile;
+    Shader::HostTranslateInfo host_info;
 
-void FillDescriptorUpdateTemplateEntries(
-    const ShaderEntries& entries, u32& binding, u32& offset,
-    std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries);
+    std::filesystem::path pipeline_cache_filename;
+
+    Common::ThreadWorker workers;
+    Common::ThreadWorker serialization_thread;
+};
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cadd5147..c9cb32d71 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -114,14 +114,10 @@ void HostCounter::EndQuery() {
 }
 
 u64 HostCounter::BlockingQuery() const {
-    if (tick >= cache.GetScheduler().CurrentTick()) {
-        cache.GetScheduler().Flush();
-    }
-
+    cache.GetScheduler().Wait(tick);
     u64 data;
     const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
-        query.first, query.second, 1, sizeof(data), &data, sizeof(data),
-        VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+        query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT);
 
     switch (query_result) {
     case VK_SUCCESS:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f57c15b37..c7a07fdd8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -24,7 +24,6 @@
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -55,11 +54,10 @@ struct DrawParams {
     u32 num_instances;
     u32 base_vertex;
     u32 num_vertices;
+    u32 first_index;
     bool is_indexed;
 };
 
-constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
-
 VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
     const auto& src = regs.viewport_transform[index];
     const float width = src.scale_x * 2.0f;
@@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
     return scissor;
 }
 
-std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
-    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
-    std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
-    for (size_t i = 0; i < std::size(addresses); ++i) {
-        addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
-    }
-    return addresses;
-}
-
-struct TextureHandle {
-    constexpr TextureHandle(u32 data, bool via_header_index) {
-        const Tegra::Texture::TextureHandle handle{data};
-        image = handle.tic_id;
-        sampler = via_header_index ? image : handle.tsc_id.Value();
-    }
-
-    u32 image;
-    u32 sampler;
-};
-
-template <typename Engine, typename Entry>
-TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
-                             size_t stage, size_t index = 0) {
-    const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
-    if constexpr (std::is_same_v<Entry, SamplerEntry>) {
-        if (entry.is_separated) {
-            const u32 buffer_1 = entry.buffer;
-            const u32 buffer_2 = entry.secondary_buffer;
-            const u32 offset_1 = entry.offset;
-            const u32 offset_2 = entry.secondary_offset;
-            const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
-            const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
-            return TextureHandle(handle_1 | handle_2, via_header_index);
-        }
-    }
-    if (entry.is_bindless) {
-        const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
-        return TextureHandle(raw, via_header_index);
-    }
-    const u32 buffer = engine.GetBoundBuffer();
-    const u64 offset = (entry.offset + index) * sizeof(u32);
-    return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
-}
-
-ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
-    if (entry.is_buffer) {
-        return ImageViewType::e2D;
-    }
-    switch (entry.type) {
-    case Tegra::Shader::TextureType::Texture1D:
-        return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
-    case Tegra::Shader::TextureType::Texture2D:
-        return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
-    case Tegra::Shader::TextureType::Texture3D:
-        return ImageViewType::e3D;
-    case Tegra::Shader::TextureType::TextureCube:
-        return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
-    }
-    UNREACHABLE();
-    return ImageViewType::e2D;
-}
-
-ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
-    switch (entry.type) {
-    case Tegra::Shader::ImageType::Texture1D:
-        return ImageViewType::e1D;
-    case Tegra::Shader::ImageType::Texture1DArray:
-        return ImageViewType::e1DArray;
-    case Tegra::Shader::ImageType::Texture2D:
-        return ImageViewType::e2D;
-    case Tegra::Shader::ImageType::Texture2DArray:
-        return ImageViewType::e2DArray;
-    case Tegra::Shader::ImageType::Texture3D:
-        return ImageViewType::e3D;
-    case Tegra::Shader::ImageType::TextureBuffer:
-        return ImageViewType::Buffer;
-    }
-    UNREACHABLE();
-    return ImageViewType::e2D;
-}
-
-void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
-                          VKUpdateDescriptorQueue& update_descriptor_queue,
-                          ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
-    for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
-        const ImageViewId image_view_id = *image_view_id_ptr++;
-        const ImageView& image_view = texture_cache.GetImageView(image_view_id);
-        update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
-    }
-    for (const auto& entry : entries.samplers) {
-        for (size_t i = 0; i < entry.size; ++i) {
-            const VkSampler sampler = *sampler_ptr++;
-            const ImageViewId image_view_id = *image_view_id_ptr++;
-            const ImageView& image_view = texture_cache.GetImageView(image_view_id);
-            const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
-            update_descriptor_queue.AddSampledImage(handle, sampler);
-        }
-    }
-    for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
-        const ImageViewId image_view_id = *image_view_id_ptr++;
-        const ImageView& image_view = texture_cache.GetImageView(image_view_id);
-        update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
-    }
-    for (const auto& entry : entries.images) {
-        // TODO: Mark as modified
-        const ImageViewId image_view_id = *image_view_id_ptr++;
-        const ImageView& image_view = texture_cache.GetImageView(image_view_id);
-        const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
-        update_descriptor_queue.AddImage(handle);
-    }
-}
-
 DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
                           bool is_indexed) {
     DrawParams params{
@@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
         .num_instances = is_instanced ? num_instances : 1,
         .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
         .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
+        .first_index = is_indexed ? regs.index_array.first : 0,
         .is_indexed = is_indexed,
     };
     if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
@@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
       blit_image(device, scheduler, state_tracker, descriptor_pool),
       astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
                         memory_allocator),
-      texture_cache_runtime{device,       scheduler,  memory_allocator,
-                            staging_pool, blit_image, astc_decoder_pass},
+      render_pass_cache(device), texture_cache_runtime{device,           scheduler,
+                                                       memory_allocator, staging_pool,
+                                                       blit_image,       astc_decoder_pass,
+                                                       render_pass_cache},
       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
                            update_descriptor_queue, descriptor_pool),
       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
-      pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
-                     descriptor_pool, update_descriptor_queue),
+      pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+                     descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
+                     texture_cache, gpu.ShaderNotify()),
       query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
-      wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
+      wfi_event(device.GetLogical().CreateEvent()) {
     scheduler.SetQueryCache(query_cache);
-    if (device.UseAsynchronousShaders()) {
-        async_shaders.AllocateWorkers();
-    }
 }
 
 RasterizerVulkan::~RasterizerVulkan() = default;
@@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     query_cache.UpdateCounters();
 
-    graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
-
-    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
-
-    texture_cache.SynchronizeGraphicsDescriptors();
-    texture_cache.UpdateRenderTargets(false);
-
-    const auto shaders = pipeline_cache.GetShaders();
-    graphics_key.shaders = GetShaderAddresses(shaders);
-
-    SetupShaderDescriptors(shaders, is_indexed);
-
-    const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
-    graphics_key.renderpass = framebuffer->RenderPass();
-
-    VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
-        graphics_key, framebuffer->NumColorBuffers(), async_shaders);
-    if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
-        // Async graphics pipeline was not ready.
+    GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
+    if (!pipeline) {
         return;
     }
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+    pipeline->Configure(is_indexed);
 
     BeginTransformFeedback();
 
-    scheduler.RequestRenderpass(framebuffer);
-    scheduler.BindGraphicsPipeline(pipeline->GetHandle());
     UpdateDynamicStates();
 
-    const auto& regs = maxwell3d.regs;
-    const u32 num_instances = maxwell3d.mme_draw.instance_count;
-    const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
-    const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
-    const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
-    scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
-        if (descriptor_set) {
-            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
-                                      DESCRIPTOR_SET, descriptor_set, nullptr);
-        }
+    const auto& regs{maxwell3d.regs};
+    const u32 num_instances{maxwell3d.mme_draw.instance_count};
+    const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
+    scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
         if (draw_params.is_indexed) {
-            cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
-                               draw_params.base_vertex, draw_params.base_instance);
+            cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
+                               draw_params.first_index, draw_params.base_vertex,
+                               draw_params.base_instance);
         } else {
             cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
                         draw_params.base_vertex, draw_params.base_instance);
         }
     });
-
     EndTransformFeedback();
 }
 
@@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() {
     if (!maxwell3d.ShouldExecute()) {
         return;
     }
+    FlushWork();
 
     query_cache.UpdateCounters();
 
@@ -395,73 +260,20 @@ void RasterizerVulkan::Clear() {
     });
 }
 
-void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
-    MICROPROFILE_SCOPE(Vulkan_Compute);
-
-    query_cache.UpdateCounters();
+void RasterizerVulkan::DispatchCompute() {
+    FlushWork();
 
-    const auto& launch_desc = kepler_compute.launch_description;
-    auto& pipeline = pipeline_cache.GetComputePipeline({
-        .shader = code_addr,
-        .shared_memory_size = launch_desc.shared_alloc,
-        .workgroup_size{
-            launch_desc.block_dim_x,
-            launch_desc.block_dim_y,
-            launch_desc.block_dim_z,
-        },
-    });
+    ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
+    if (!pipeline) {
+        return;
+    }
+    std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
+    pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
 
-    // Compute dispatches can't be executed inside a renderpass
+    const auto& qmd{kepler_compute.launch_description};
+    const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
     scheduler.RequestOutsideRenderPassOperationContext();
-
-    image_view_indices.clear();
-    sampler_handles.clear();
-
-    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
-
-    const auto& entries = pipeline.GetEntries();
-    buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
-    buffer_cache.UnbindComputeStorageBuffers();
-    u32 ssbo_index = 0;
-    for (const auto& buffer : entries.global_buffers) {
-        buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
-                                              buffer.is_written);
-        ++ssbo_index;
-    }
-    buffer_cache.UpdateComputeBuffers();
-
-    texture_cache.SynchronizeComputeDescriptors();
-
-    SetupComputeUniformTexels(entries);
-    SetupComputeTextures(entries);
-    SetupComputeStorageTexels(entries);
-    SetupComputeImages(entries);
-
-    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
-    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
-
-    update_descriptor_queue.Acquire();
-
-    buffer_cache.BindHostComputeBuffers();
-
-    ImageViewId* image_view_id_ptr = image_view_ids.data();
-    VkSampler* sampler_ptr = sampler_handles.data();
-    PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
-                         sampler_ptr);
-
-    const VkPipeline pipeline_handle = pipeline.GetHandle();
-    const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
-    const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
-    scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
-                      grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
-                      descriptor_set](vk::CommandBuffer cmdbuf) {
-        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
-        if (descriptor_set) {
-            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
-                                      DESCRIPTOR_SET, descriptor_set, nullptr);
-        }
-        cmdbuf.Dispatch(grid_x, grid_y, grid_z);
-    });
+    scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
 }
 
 void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
@@ -626,6 +438,7 @@ void RasterizerVulkan::WaitForIdle() {
 
 void RasterizerVulkan::FragmentBarrier() {
     // We already put barriers when a render pass finishes
+    scheduler.RequestOutsideRenderPassOperationContext();
 }
 
 void RasterizerVulkan::TiledCacheBarrier() {
@@ -633,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() {
 }
 
 void RasterizerVulkan::FlushCommands() {
-    if (draw_counter > 0) {
-        draw_counter = 0;
-        scheduler.Flush();
+    if (draw_counter == 0) {
+        return;
     }
+    draw_counter = 0;
+    scheduler.Flush();
 }
 
 void RasterizerVulkan::TickFrame() {
@@ -676,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     if (!image_view) {
         return false;
     }
-    screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
+    screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D);
     screen_info.width = image_view->size.width;
     screen_info.height = image_view->size.height;
     screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
     return true;
 }
 
+void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+                                         const VideoCore::DiskResourceLoadCallback& callback) {
+    pipeline_cache.LoadDiskResources(title_id, stop_loading, callback);
+}
+
 void RasterizerVulkan::FlushWork() {
     static constexpr u32 DRAWS_TO_DISPATCH = 4096;
 
@@ -691,13 +510,11 @@ void RasterizerVulkan::FlushWork() {
     if ((++draw_counter & 7) != 7) {
         return;
     }
-
     if (draw_counter < DRAWS_TO_DISPATCH) {
         // Send recorded tasks to the worker thread
         scheduler.DispatchWork();
         return;
     }
-
     // Otherwise (every certain number of draws) flush execution.
     // This submits commands to the Vulkan driver.
     scheduler.Flush();
@@ -716,52 +533,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
     return buffer_cache.DMACopy(src_address, dest_address, amount);
 }
 
-void RasterizerVulkan::SetupShaderDescriptors(
-    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
-    image_view_indices.clear();
-    sampler_handles.clear();
-    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
-        Shader* const shader = shaders[stage + 1];
-        if (!shader) {
-            continue;
-        }
-        const ShaderEntries& entries = shader->GetEntries();
-        SetupGraphicsUniformTexels(entries, stage);
-        SetupGraphicsTextures(entries, stage);
-        SetupGraphicsStorageTexels(entries, stage);
-        SetupGraphicsImages(entries, stage);
-
-        buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
-        buffer_cache.UnbindGraphicsStorageBuffers(stage);
-        u32 ssbo_index = 0;
-        for (const auto& buffer : entries.global_buffers) {
-            buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
-                                                   buffer.cbuf_offset, buffer.is_written);
-            ++ssbo_index;
-        }
-    }
-    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
-    buffer_cache.UpdateGraphicsBuffers(is_indexed);
-    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
-
-    buffer_cache.BindHostGeometryBuffers(is_indexed);
-
-    update_descriptor_queue.Acquire();
-
-    ImageViewId* image_view_id_ptr = image_view_ids.data();
-    VkSampler* sampler_ptr = sampler_handles.data();
-    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
-        // Skip VertexA stage
-        Shader* const shader = shaders[stage + 1];
-        if (!shader) {
-            continue;
-        }
-        buffer_cache.BindHostStageBuffers(stage);
-        PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
-                             image_view_id_ptr, sampler_ptr);
-    }
-}
-
 void RasterizerVulkan::UpdateDynamicStates() {
     auto& regs = maxwell3d.regs;
     UpdateViewportsState(regs);
@@ -770,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
     UpdateBlendConstants(regs);
     UpdateDepthBounds(regs);
     UpdateStencilFaces(regs);
+    UpdateLineWidth(regs);
     if (device.IsExtExtendedDynamicStateSupported()) {
         UpdateCullMode(regs);
         UpdateDepthBoundsTestEnable(regs);
@@ -779,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() {
         UpdateFrontFace(regs);
         UpdateStencilOp(regs);
         UpdateStencilTestEnable(regs);
+        if (device.IsExtVertexInputDynamicStateSupported()) {
+            UpdateVertexInput(regs);
+        }
     }
 }
 
@@ -810,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() {
         [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
 }
 
-void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
-    const auto& regs = maxwell3d.regs;
-    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
-    for (const auto& entry : entries.uniform_texels) {
-        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
-        image_view_indices.push_back(handle.image);
-    }
-}
-
-void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
-    const auto& regs = maxwell3d.regs;
-    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
-    for (const auto& entry : entries.samplers) {
-        for (size_t index = 0; index < entry.size; ++index) {
-            const TextureHandle handle =
-                GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
-            image_view_indices.push_back(handle.image);
-
-            Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
-            sampler_handles.push_back(sampler->Handle());
-        }
-    }
-}
-
-void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
-    const auto& regs = maxwell3d.regs;
-    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
-    for (const auto& entry : entries.storage_texels) {
-        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
-        image_view_indices.push_back(handle.image);
-    }
-}
-
-void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
-    const auto& regs = maxwell3d.regs;
-    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
-    for (const auto& entry : entries.images) {
-        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
-        image_view_indices.push_back(handle.image);
-    }
-}
-
-void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
-    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
-    for (const auto& entry : entries.uniform_texels) {
-        const TextureHandle handle =
-            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
-        image_view_indices.push_back(handle.image);
-    }
-}
-
-void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
-    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
-    for (const auto& entry : entries.samplers) {
-        for (size_t index = 0; index < entry.size; ++index) {
-            const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
-                                                        COMPUTE_SHADER_INDEX, index);
-            image_view_indices.push_back(handle.image);
-
-            Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
-            sampler_handles.push_back(sampler->Handle());
-        }
-    }
-}
-
-void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
-    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
-    for (const auto& entry : entries.storage_texels) {
-        const TextureHandle handle =
-            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
-        image_view_indices.push_back(handle.image);
-    }
-}
-
-void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
-    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
-    for (const auto& entry : entries.images) {
-        const TextureHandle handle =
-            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
-        image_view_indices.push_back(handle.image);
-    }
-}
-
 void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
     if (!state_tracker.TouchViewports()) {
         return;
@@ -985,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
     }
 }
 
+void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
+    if (!state_tracker.TouchLineWidth()) {
+        return;
+    }
+    const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased;
+    scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); });
+}
+
 void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
     if (!state_tracker.TouchCullMode()) {
         return;
@@ -999,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
     if (!state_tracker.TouchDepthBoundsTestEnable()) {
         return;
     }
+    bool enabled = regs.depth_bounds_enable;
+    if (enabled && !device.IsDepthBoundsSupported()) {
+        LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
+        enabled = false;
+    }
     scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
         cmdbuf.SetDepthBoundsTestEnableEXT(enable);
     });
@@ -1086,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
     });
 }
 
+void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
+    auto& dirty{maxwell3d.dirty.flags};
+    if (!dirty[Dirty::VertexInput]) {
+        return;
+    }
+    dirty[Dirty::VertexInput] = false;
+
+    boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings;
+    boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes;
+
+    // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up
+    // generating dirty state. Track the highest dirty attribute and update all attributes until
+    // that one.
+    size_t highest_dirty_attr{};
+    for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+        if (dirty[Dirty::VertexAttribute0 + index]) {
+            highest_dirty_attr = index;
+        }
+    }
+    for (size_t index = 0; index < highest_dirty_attr; ++index) {
+        const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]};
+        const u32 binding{attribute.buffer};
+        dirty[Dirty::VertexAttribute0 + index] = false;
+        dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true;
+        if (!attribute.constant) {
+            attributes.push_back({
+                .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT,
+                .pNext = nullptr,
+                .location = static_cast<u32>(index),
+                .binding = binding,
+                .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size),
+                .offset = attribute.offset,
+            });
+        }
+    }
+    for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+        if (!dirty[Dirty::VertexBinding0 + index]) {
+            continue;
+        }
+        dirty[Dirty::VertexBinding0 + index] = false;
+
+        const u32 binding{static_cast<u32>(index)};
+        const auto& input_binding{regs.vertex_array[binding]};
+        const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)};
+        bindings.push_back({
+            .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT,
+            .pNext = nullptr,
+            .binding = binding,
+            .stride = input_binding.stride,
+            .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX,
+            .divisor = is_instanced ? input_binding.divisor : 1,
+        });
+    }
+    scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) {
+        cmdbuf.SetVertexInputEXT(bindings, attributes);
+    });
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 2065209be..866827247 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,14 +21,13 @@
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_fence_manager.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/shader/async_shaders.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
@@ -73,7 +72,7 @@ public:
 
     void Draw(bool is_indexed, bool is_instanced) override;
     void Clear() override;
-    void DispatchCompute(GPUVAddr code_addr) override;
+    void DispatchCompute() override;
     void ResetCounter(VideoCore::QueryType type) override;
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -102,19 +101,8 @@ public:
     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                            u32 pixel_stride) override;
-
-    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
-        return async_shaders;
-    }
-
-    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
-        return async_shaders;
-    }
-
-    /// Maximum supported size that a constbuffer can have in bytes.
-    static constexpr size_t MaxConstbufferSize = 0x10000;
-    static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
-                  "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
+    void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+                           const VideoCore::DiskResourceLoadCallback& callback) override;
 
 private:
     static constexpr size_t MAX_TEXTURES = 192;
@@ -125,46 +113,19 @@ private:
 
     void FlushWork();
 
-    /// Setup descriptors in the graphics pipeline.
-    void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
-                                bool is_indexed);
-
     void UpdateDynamicStates();
 
     void BeginTransformFeedback();
 
     void EndTransformFeedback();
 
-    /// Setup uniform texels in the graphics pipeline.
-    void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
-
-    /// Setup textures in the graphics pipeline.
-    void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
-
-    /// Setup storage texels in the graphics pipeline.
-    void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
-
-    /// Setup images in the graphics pipeline.
-    void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
-
-    /// Setup texel buffers in the compute pipeline.
-    void SetupComputeUniformTexels(const ShaderEntries& entries);
-
-    /// Setup textures in the compute pipeline.
-    void SetupComputeTextures(const ShaderEntries& entries);
-
-    /// Setup storage texels in the compute pipeline.
-    void SetupComputeStorageTexels(const ShaderEntries& entries);
-
-    /// Setup images in the compute pipeline.
-    void SetupComputeImages(const ShaderEntries& entries);
-
     void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
+    void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
 
     void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -175,6 +136,8 @@ private:
     void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
 
+    void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
+
     Tegra::GPU& gpu;
     Tegra::MemoryManager& gpu_memory;
     Tegra::Engines::Maxwell3D& maxwell3d;
@@ -187,24 +150,22 @@ private:
     VKScheduler& scheduler;
 
     StagingBufferPool staging_pool;
-    VKDescriptorPool descriptor_pool;
+    DescriptorPool descriptor_pool;
     VKUpdateDescriptorQueue update_descriptor_queue;
     BlitImageHelper blit_image;
     ASTCDecoderPass astc_decoder_pass;
-
-    GraphicsPipelineCacheKey graphics_key;
+    RenderPassCache render_pass_cache;
 
     TextureCacheRuntime texture_cache_runtime;
     TextureCache texture_cache;
     BufferCacheRuntime buffer_cache_runtime;
     BufferCache buffer_cache;
-    VKPipelineCache pipeline_cache;
+    PipelineCache pipeline_cache;
     VKQueryCache query_cache;
     AccelerateDMA accelerate_dma;
     VKFenceManager fence_manager;
 
     vk::Event wfi_event;
-    VideoCommon::Shader::AsyncShaders async_shaders;
 
     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
     std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
new file mode 100644
index 000000000..451ffe019
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -0,0 +1,96 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+namespace {
+using VideoCore::Surface::PixelFormat;
+
+VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
+                                              VkSampleCountFlagBits samples) {
+    using MaxwellToVK::SurfaceFormat;
+    return {
+        .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+        .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
+        .samples = samples,
+        .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+        .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+        .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+        .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+        .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+        .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+    };
+}
+} // Anonymous namespace
+
+RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
+
+VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
+    std::lock_guard lock{mutex};
+    const auto [pair, is_new] = cache.try_emplace(key);
+    if (!is_new) {
+        return *pair->second;
+    }
+    boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
+    std::array<VkAttachmentReference, 8> references{};
+    u32 num_attachments{};
+    u32 num_colors{};
+    for (size_t index = 0; index < key.color_formats.size(); ++index) {
+        const PixelFormat format{key.color_formats[index]};
+        const bool is_valid{format != PixelFormat::Invalid};
+        references[index] = VkAttachmentReference{
+            .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
+            .layout = VK_IMAGE_LAYOUT_GENERAL,
+        };
+        if (is_valid) {
+            descriptions.push_back(AttachmentDescription(*device, format, key.samples));
+            num_attachments = static_cast<u32>(index + 1);
+            ++num_colors;
+        }
+    }
+    const bool has_depth{key.depth_format != PixelFormat::Invalid};
+    VkAttachmentReference depth_reference{};
+    if (key.depth_format != PixelFormat::Invalid) {
+        depth_reference = VkAttachmentReference{
+            .attachment = num_colors,
+            .layout = VK_IMAGE_LAYOUT_GENERAL,
+        };
+        descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
+    }
+    const VkSubpassDescription subpass{
+        .flags = 0,
+        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+        .inputAttachmentCount = 0,
+        .pInputAttachments = nullptr,
+        .colorAttachmentCount = num_attachments,
+        .pColorAttachments = references.data(),
+        .pResolveAttachments = nullptr,
+        .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
+        .preserveAttachmentCount = 0,
+        .pPreserveAttachments = nullptr,
+    };
+    pair->second = device->GetLogical().CreateRenderPass({
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .attachmentCount = static_cast<u32>(descriptions.size()),
+        .pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
+        .subpassCount = 1,
+        .pSubpasses = &subpass,
+        .dependencyCount = 0,
+        .pDependencies = nullptr,
+    });
+    return *pair->second;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
new file mode 100644
index 000000000..eaa0ed775
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <mutex>
+#include <unordered_map>
+
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+struct RenderPassKey {
+    auto operator<=>(const RenderPassKey&) const noexcept = default;
+
+    std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
+    VideoCore::Surface::PixelFormat depth_format;
+    VkSampleCountFlagBits samples;
+};
+
+} // namespace Vulkan
+
+namespace std {
+template <>
+struct hash<Vulkan::RenderPassKey> {
+    [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
+        size_t value = static_cast<size_t>(key.depth_format) << 48;
+        value ^= static_cast<size_t>(key.samples) << 52;
+        for (size_t i = 0; i < key.color_formats.size(); ++i) {
+            value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
+        }
+        return value;
+    }
+};
+} // namespace std
+
+namespace Vulkan {
+
+class Device;
+
+class RenderPassCache {
+public:
+    explicit RenderPassCache(const Device& device_);
+
+    VkRenderPass Get(const RenderPassKey& key);
+
+private:
+    const Device* device{};
+    std::unordered_map<RenderPassKey, vk::RenderPass> cache;
+    std::mutex mutex;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index a8bf7bda8..2dd514968 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -10,18 +10,16 @@
 namespace Vulkan {
 
 ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
-    : master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
-
-ResourcePool::~ResourcePool() = default;
+    : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
 
 size_t ResourcePool::CommitResource() {
     // Refresh semaphore to query updated results
-    master_semaphore.Refresh();
-    const u64 gpu_tick = master_semaphore.KnownGpuTick();
+    master_semaphore->Refresh();
+    const u64 gpu_tick = master_semaphore->KnownGpuTick();
     const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
         for (size_t iterator = begin; iterator < end; ++iterator) {
             if (gpu_tick >= ticks[iterator]) {
-                ticks[iterator] = master_semaphore.CurrentTick();
+                ticks[iterator] = master_semaphore->CurrentTick();
                 return iterator;
             }
         }
@@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
             // Both searches failed, the pool is full; handle it.
             const size_t free_resource = ManageOverflow();
 
-            ticks[free_resource] = master_semaphore.CurrentTick();
+            ticks[free_resource] = master_semaphore->CurrentTick();
             found = free_resource;
         }
     }
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index 9d0bb3b4d..f0b80ad59 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -18,8 +18,16 @@ class MasterSemaphore;
  */
 class ResourcePool {
 public:
+    explicit ResourcePool() = default;
     explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
-    virtual ~ResourcePool();
+
+    virtual ~ResourcePool() = default;
+
+    ResourcePool& operator=(ResourcePool&&) noexcept = default;
+    ResourcePool(ResourcePool&&) noexcept = default;
+
+    ResourcePool& operator=(const ResourcePool&) = default;
+    ResourcePool(const ResourcePool&) = default;
 
 protected:
     size_t CommitResource();
@@ -34,7 +42,7 @@ private:
     /// Allocates a new page of resources.
     void Grow();
 
-    MasterSemaphore& master_semaphore;
+    MasterSemaphore* master_semaphore{};
     size_t grow_step = 0;     ///< Number of new resources created after an overflow
     size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
     std::vector<u64> ticks;   ///< Ticks for each resource
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f35c120b0..4840962de 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
         command->~Command();
         command = next;
     }
-
+    submit = false;
     command_offset = 0;
     first = nullptr;
     last = nullptr;
@@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
       master_semaphore{std::make_unique<MasterSemaphore>(device)},
       command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
     AcquireNewChunk();
-    AllocateNewContext();
+    AllocateWorkerCommandBuffer();
     worker_thread = std::thread(&VKScheduler::WorkerThread, this);
 }
 
 VKScheduler::~VKScheduler() {
-    quit = true;
-    cv.notify_all();
+    {
+        std::lock_guard lock{work_mutex};
+        quit = true;
+    }
+    work_cv.notify_all();
     worker_thread.join();
 }
 
@@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) {
 void VKScheduler::Finish(VkSemaphore semaphore) {
     const u64 presubmit_tick = CurrentTick();
     SubmitExecution(semaphore);
+    WaitWorker();
     Wait(presubmit_tick);
     AllocateNewContext();
 }
@@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() {
     MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
     DispatchWork();
 
-    bool finished = false;
-    do {
-        cv.notify_all();
-        std::unique_lock lock{mutex};
-        finished = chunk_queue.Empty();
-    } while (!finished);
+    std::unique_lock lock{work_mutex};
+    wait_cv.wait(lock, [this] { return work_queue.empty(); });
 }
 
 void VKScheduler::DispatchWork() {
     if (chunk->Empty()) {
         return;
     }
-    chunk_queue.Push(std::move(chunk));
-    cv.notify_all();
+    {
+        std::lock_guard lock{work_mutex};
+        work_queue.push(std::move(chunk));
+    }
+    work_cv.notify_one();
     AcquireNewChunk();
 }
 
@@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
     EndRenderPass();
 }
 
-void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
+bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
     if (state.graphics_pipeline == pipeline) {
-        return;
+        return false;
     }
     state.graphics_pipeline = pipeline;
-    Record([pipeline](vk::CommandBuffer cmdbuf) {
-        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
-    });
+    return true;
 }
 
 void VKScheduler::WorkerThread() {
-    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
-    std::unique_lock lock{mutex};
+    Common::SetCurrentThreadName("yuzu:VulkanWorker");
     do {
-        cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
-        if (quit) {
-            continue;
+        if (work_queue.empty()) {
+            wait_cv.notify_all();
+        }
+        std::unique_ptr<CommandChunk> work;
+        {
+            std::unique_lock lock{work_mutex};
+            work_cv.wait(lock, [this] { return !work_queue.empty() || quit; });
+            if (quit) {
+                continue;
+            }
+            work = std::move(work_queue.front());
+            work_queue.pop();
+        }
+        const bool has_submit = work->HasSubmit();
+        work->ExecuteAll(current_cmdbuf);
+        if (has_submit) {
+            AllocateWorkerCommandBuffer();
         }
-        auto extracted_chunk = std::move(chunk_queue.Front());
-        chunk_queue.Pop();
-        extracted_chunk->ExecuteAll(current_cmdbuf);
-        chunk_reserve.Push(std::move(extracted_chunk));
+        std::lock_guard reserve_lock{reserve_mutex};
+        chunk_reserve.push_back(std::move(work));
     } while (!quit);
 }
 
+void VKScheduler::AllocateWorkerCommandBuffer() {
+    current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
+    current_cmdbuf.Begin({
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .pNext = nullptr,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+        .pInheritanceInfo = nullptr,
+    });
+}
+
 void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
     EndPendingOperations();
     InvalidateState();
-    WaitWorker();
 
-    std::unique_lock lock{mutex};
+    const u64 signal_value = master_semaphore->NextTick();
+    Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+        cmdbuf.End();
 
-    current_cmdbuf.End();
+        const u32 num_signal_semaphores = semaphore ? 2U : 1U;
 
-    const VkSemaphore timeline_semaphore = master_semaphore->Handle();
-    const u32 num_signal_semaphores = semaphore ? 2U : 1U;
+        const u64 wait_value = signal_value - 1;
+        const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
 
-    const u64 signal_value = master_semaphore->CurrentTick();
-    const u64 wait_value = signal_value - 1;
-    const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+        const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+        const std::array signal_values{signal_value, u64(0)};
+        const std::array signal_semaphores{timeline_semaphore, semaphore};
 
-    master_semaphore->NextTick();
-
-    const std::array signal_values{signal_value, u64(0)};
-    const std::array signal_semaphores{timeline_semaphore, semaphore};
-
-    const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
-        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
-        .pNext = nullptr,
-        .waitSemaphoreValueCount = 1,
-        .pWaitSemaphoreValues = &wait_value,
-        .signalSemaphoreValueCount = num_signal_semaphores,
-        .pSignalSemaphoreValues = signal_values.data(),
-    };
-    const VkSubmitInfo submit_info{
-        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
-        .pNext = &timeline_si,
-        .waitSemaphoreCount = 1,
-        .pWaitSemaphores = &timeline_semaphore,
-        .pWaitDstStageMask = &wait_stage_mask,
-        .commandBufferCount = 1,
-        .pCommandBuffers = current_cmdbuf.address(),
-        .signalSemaphoreCount = num_signal_semaphores,
-        .pSignalSemaphores = signal_semaphores.data(),
-    };
-    switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
-    case VK_SUCCESS:
-        break;
-    case VK_ERROR_DEVICE_LOST:
-        device.ReportLoss();
-        [[fallthrough]];
-    default:
-        vk::Check(result);
-    }
+        const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
+            .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+            .pNext = nullptr,
+            .waitSemaphoreValueCount = 1,
+            .pWaitSemaphoreValues = &wait_value,
+            .signalSemaphoreValueCount = num_signal_semaphores,
+            .pSignalSemaphoreValues = signal_values.data(),
+        };
+        const VkSubmitInfo submit_info{
+            .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+            .pNext = &timeline_si,
+            .waitSemaphoreCount = 1,
+            .pWaitSemaphores = &timeline_semaphore,
+            .pWaitDstStageMask = &wait_stage_mask,
+            .commandBufferCount = 1,
+            .pCommandBuffers = cmdbuf.address(),
+            .signalSemaphoreCount = num_signal_semaphores,
+            .pSignalSemaphores = signal_semaphores.data(),
+        };
+        switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
+        case VK_SUCCESS:
+            break;
+        case VK_ERROR_DEVICE_LOST:
+            device.ReportLoss();
+            [[fallthrough]];
+        default:
+            vk::Check(result);
+        }
+    });
+    chunk->MarkSubmit();
+    DispatchWork();
 }
 
 void VKScheduler::AllocateNewContext() {
-    std::unique_lock lock{mutex};
-
-    current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
-    current_cmdbuf.Begin({
-        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-        .pNext = nullptr,
-        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
-        .pInheritanceInfo = nullptr,
-    });
-
     // Enable counters once again. These are disabled when a command buffer is finished.
     if (query_cache) {
         query_cache->UpdateCounters();
@@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() {
 }
 
 void VKScheduler::AcquireNewChunk() {
-    if (chunk_reserve.Empty()) {
+    std::lock_guard lock{reserve_mutex};
+    if (chunk_reserve.empty()) {
         chunk = std::make_unique<CommandChunk>();
         return;
     }
-    chunk = std::move(chunk_reserve.Front());
-    chunk_reserve.Pop();
+    chunk = std::move(chunk_reserve.back());
+    chunk_reserve.pop_back();
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 3ce48e9d2..cf39a2363 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -8,12 +8,12 @@
 #include <condition_variable>
 #include <cstddef>
 #include <memory>
-#include <stack>
 #include <thread>
 #include <utility>
+#include <queue>
+
 #include "common/alignment.h"
 #include "common/common_types.h"
-#include "common/threadsafe_queue.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
@@ -22,6 +22,7 @@ namespace Vulkan {
 class CommandPool;
 class Device;
 class Framebuffer;
+class GraphicsPipeline;
 class StateTracker;
 class VKQueryCache;
 
@@ -52,8 +53,8 @@ public:
     /// of a renderpass.
     void RequestOutsideRenderPassOperationContext();
 
-    /// Binds a pipeline to the current execution context.
-    void BindGraphicsPipeline(VkPipeline pipeline);
+    /// Update the pipeline to the current execution context.
+    bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
 
     /// Invalidates current command buffer state except for render passes
     void InvalidateState();
@@ -85,6 +86,10 @@ public:
 
     /// Waits for the given tick to trigger on the GPU.
     void Wait(u64 tick) {
+        if (tick >= master_semaphore->CurrentTick()) {
+            // Make sure we are not waiting for the current tick without signalling
+            Flush();
+        }
         master_semaphore->Wait(tick);
     }
 
@@ -154,15 +159,24 @@ private:
             return true;
         }
 
+        void MarkSubmit() {
+            submit = true;
+        }
+
         bool Empty() const {
             return command_offset == 0;
         }
 
+        bool HasSubmit() const {
+            return submit;
+        }
+
     private:
         Command* first = nullptr;
         Command* last = nullptr;
 
         size_t command_offset = 0;
+        bool submit = false;
         alignas(std::max_align_t) std::array<u8, 0x8000> data{};
     };
 
@@ -170,11 +184,13 @@ private:
         VkRenderPass renderpass = nullptr;
         VkFramebuffer framebuffer = nullptr;
         VkExtent2D render_area = {0, 0};
-        VkPipeline graphics_pipeline = nullptr;
+        GraphicsPipeline* graphics_pipeline = nullptr;
     };
 
     void WorkerThread();
 
+    void AllocateWorkerCommandBuffer();
+
     void SubmitExecution(VkSemaphore semaphore);
 
     void AllocateNewContext();
@@ -204,11 +220,13 @@ private:
     std::array<VkImage, 9> renderpass_images{};
     std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
 
-    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
-    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
-    std::mutex mutex;
-    std::condition_variable cv;
-    bool quit = false;
+    std::queue<std::unique_ptr<CommandChunk>> work_queue;
+    std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
+    std::mutex reserve_mutex;
+    std::mutex work_mutex;
+    std::condition_variable work_cv;
+    std::condition_variable wait_cv;
+    std::atomic_bool quit{};
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
deleted file mode 100644
index c6846d886..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ /dev/null
@@ -1,3166 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <functional>
-#include <limits>
-#include <map>
-#include <optional>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-#include <fmt/format.h>
-
-#include <sirit/sirit.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/shader/transform_feedback.h"
-#include "video_core/vulkan_common/vulkan_device.h"
-
-namespace Vulkan {
-
-namespace {
-
-using Sirit::Id;
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using namespace VideoCommon::Shader;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using Operation = const OperationNode&;
-
-class ASTDecompiler;
-class ExprDecompiler;
-
-// TODO(Rodrigo): Use rasterizer's value
-constexpr u32 MaxConstBufferFloats = 0x4000;
-constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4;
-
-constexpr u32 NumInputPatches = 32; // This value seems to be the standard
-
-enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-
-class Expression final {
-public:
-    Expression(Id id_, Type type_) : id{id_}, type{type_} {
-        ASSERT(type_ != Type::Void);
-    }
-    Expression() : type{Type::Void} {}
-
-    Id id{};
-    Type type{};
-};
-static_assert(std::is_standard_layout_v<Expression>);
-
-struct TexelBuffer {
-    Id image_type{};
-    Id image{};
-};
-
-struct SampledImage {
-    Id image_type{};
-    Id sampler_type{};
-    Id sampler_pointer_type{};
-    Id variable{};
-};
-
-struct StorageImage {
-    Id image_type{};
-    Id image{};
-};
-
-struct AttributeType {
-    Type type;
-    Id scalar;
-    Id vector;
-};
-
-struct VertexIndices {
-    std::optional<u32> position;
-    std::optional<u32> layer;
-    std::optional<u32> viewport;
-    std::optional<u32> point_size;
-    std::optional<u32> clip_distances;
-};
-
-struct GenericVaryingDescription {
-    Id id = nullptr;
-    u32 first_element = 0;
-    bool is_scalar = false;
-};
-
-spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
-    ASSERT(!sampler.is_buffer);
-    switch (sampler.type) {
-    case Tegra::Shader::TextureType::Texture1D:
-        return spv::Dim::Dim1D;
-    case Tegra::Shader::TextureType::Texture2D:
-        return spv::Dim::Dim2D;
-    case Tegra::Shader::TextureType::Texture3D:
-        return spv::Dim::Dim3D;
-    case Tegra::Shader::TextureType::TextureCube:
-        return spv::Dim::Cube;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type);
-        return spv::Dim::Dim2D;
-    }
-}
-
-std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
-    switch (image.type) {
-    case Tegra::Shader::ImageType::Texture1D:
-        return {spv::Dim::Dim1D, false};
-    case Tegra::Shader::ImageType::TextureBuffer:
-        return {spv::Dim::Buffer, false};
-    case Tegra::Shader::ImageType::Texture1DArray:
-        return {spv::Dim::Dim1D, true};
-    case Tegra::Shader::ImageType::Texture2D:
-        return {spv::Dim::Dim2D, false};
-    case Tegra::Shader::ImageType::Texture2DArray:
-        return {spv::Dim::Dim2D, true};
-    case Tegra::Shader::ImageType::Texture3D:
-        return {spv::Dim::Dim3D, false};
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type);
-        return {spv::Dim::Dim2D, false};
-    }
-}
-
-/// Returns the number of vertices present in a primitive topology.
-u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) {
-    switch (primitive_topology) {
-    case Maxwell::PrimitiveTopology::Points:
-        return 1;
-    case Maxwell::PrimitiveTopology::Lines:
-    case Maxwell::PrimitiveTopology::LineLoop:
-    case Maxwell::PrimitiveTopology::LineStrip:
-        return 2;
-    case Maxwell::PrimitiveTopology::Triangles:
-    case Maxwell::PrimitiveTopology::TriangleStrip:
-    case Maxwell::PrimitiveTopology::TriangleFan:
-        return 3;
-    case Maxwell::PrimitiveTopology::LinesAdjacency:
-    case Maxwell::PrimitiveTopology::LineStripAdjacency:
-        return 4;
-    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
-    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
-        return 6;
-    case Maxwell::PrimitiveTopology::Quads:
-        UNIMPLEMENTED_MSG("Quads");
-        return 3;
-    case Maxwell::PrimitiveTopology::QuadStrip:
-        UNIMPLEMENTED_MSG("QuadStrip");
-        return 3;
-    case Maxwell::PrimitiveTopology::Polygon:
-        UNIMPLEMENTED_MSG("Polygon");
-        return 3;
-    case Maxwell::PrimitiveTopology::Patches:
-        UNIMPLEMENTED_MSG("Patches");
-        return 3;
-    default:
-        UNREACHABLE();
-        return 3;
-    }
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) {
-    switch (primitive) {
-    case Maxwell::TessellationPrimitive::Isolines:
-        return spv::ExecutionMode::Isolines;
-    case Maxwell::TessellationPrimitive::Triangles:
-        return spv::ExecutionMode::Triangles;
-    case Maxwell::TessellationPrimitive::Quads:
-        return spv::ExecutionMode::Quads;
-    }
-    UNREACHABLE();
-    return spv::ExecutionMode::Triangles;
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) {
-    switch (spacing) {
-    case Maxwell::TessellationSpacing::Equal:
-        return spv::ExecutionMode::SpacingEqual;
-    case Maxwell::TessellationSpacing::FractionalOdd:
-        return spv::ExecutionMode::SpacingFractionalOdd;
-    case Maxwell::TessellationSpacing::FractionalEven:
-        return spv::ExecutionMode::SpacingFractionalEven;
-    }
-    UNREACHABLE();
-    return spv::ExecutionMode::SpacingEqual;
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) {
-    switch (input_topology) {
-    case Maxwell::PrimitiveTopology::Points:
-        return spv::ExecutionMode::InputPoints;
-    case Maxwell::PrimitiveTopology::Lines:
-    case Maxwell::PrimitiveTopology::LineLoop:
-    case Maxwell::PrimitiveTopology::LineStrip:
-        return spv::ExecutionMode::InputLines;
-    case Maxwell::PrimitiveTopology::Triangles:
-    case Maxwell::PrimitiveTopology::TriangleStrip:
-    case Maxwell::PrimitiveTopology::TriangleFan:
-        return spv::ExecutionMode::Triangles;
-    case Maxwell::PrimitiveTopology::LinesAdjacency:
-    case Maxwell::PrimitiveTopology::LineStripAdjacency:
-        return spv::ExecutionMode::InputLinesAdjacency;
-    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
-    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
-        return spv::ExecutionMode::InputTrianglesAdjacency;
-    case Maxwell::PrimitiveTopology::Quads:
-        UNIMPLEMENTED_MSG("Quads");
-        return spv::ExecutionMode::Triangles;
-    case Maxwell::PrimitiveTopology::QuadStrip:
-        UNIMPLEMENTED_MSG("QuadStrip");
-        return spv::ExecutionMode::Triangles;
-    case Maxwell::PrimitiveTopology::Polygon:
-        UNIMPLEMENTED_MSG("Polygon");
-        return spv::ExecutionMode::Triangles;
-    case Maxwell::PrimitiveTopology::Patches:
-        UNIMPLEMENTED_MSG("Patches");
-        return spv::ExecutionMode::Triangles;
-    }
-    UNREACHABLE();
-    return spv::ExecutionMode::Triangles;
-}
-
-spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) {
-    switch (output_topology) {
-    case Tegra::Shader::OutputTopology::PointList:
-        return spv::ExecutionMode::OutputPoints;
-    case Tegra::Shader::OutputTopology::LineStrip:
-        return spv::ExecutionMode::OutputLineStrip;
-    case Tegra::Shader::OutputTopology::TriangleStrip:
-        return spv::ExecutionMode::OutputTriangleStrip;
-    default:
-        UNREACHABLE();
-        return spv::ExecutionMode::OutputPoints;
-    }
-}
-
-/// Returns true if an attribute index is one of the 32 generic attributes
-constexpr bool IsGenericAttribute(Attribute::Index attribute) {
-    return attribute >= Attribute::Index::Attribute_0 &&
-           attribute <= Attribute::Index::Attribute_31;
-}
-
-/// Returns the location of a generic attribute
-u32 GetGenericAttributeLocation(Attribute::Index attribute) {
-    ASSERT(IsGenericAttribute(attribute));
-    return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-/// Returns true if an object has to be treated as precise
-bool IsPrecise(Operation operand) {
-    const auto& meta{operand.GetMeta()};
-    if (std::holds_alternative<MetaArithmetic>(meta)) {
-        return std::get<MetaArithmetic>(meta).precise;
-    }
-    return false;
-}
-
-class SPIRVDecompiler final : public Sirit::Module {
-public:
-    explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
-                             const Registry& registry_, const Specialization& specialization_)
-        : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
-          registry{registry_}, specialization{specialization_} {
-        if (stage_ != ShaderType::Compute) {
-            transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
-        }
-
-        AddCapability(spv::Capability::Shader);
-        AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
-        AddCapability(spv::Capability::ImageQuery);
-        AddCapability(spv::Capability::Image1D);
-        AddCapability(spv::Capability::ImageBuffer);
-        AddCapability(spv::Capability::ImageGatherExtended);
-        AddCapability(spv::Capability::SampledBuffer);
-        AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
-        AddCapability(spv::Capability::DrawParameters);
-        AddCapability(spv::Capability::SubgroupBallotKHR);
-        AddCapability(spv::Capability::SubgroupVoteKHR);
-        AddExtension("SPV_KHR_16bit_storage");
-        AddExtension("SPV_KHR_shader_ballot");
-        AddExtension("SPV_KHR_subgroup_vote");
-        AddExtension("SPV_KHR_storage_buffer_storage_class");
-        AddExtension("SPV_KHR_variable_pointers");
-        AddExtension("SPV_KHR_shader_draw_parameters");
-
-        if (!transform_feedback.empty()) {
-            if (device.IsExtTransformFeedbackSupported()) {
-                AddCapability(spv::Capability::TransformFeedback);
-            } else {
-                LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
-                                         "supported on this device");
-            }
-        }
-        if (ir.UsesLayer() || ir.UsesViewportIndex()) {
-            if (ir.UsesViewportIndex()) {
-                AddCapability(spv::Capability::MultiViewport);
-            }
-            if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {
-                AddExtension("SPV_EXT_shader_viewport_index_layer");
-                AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
-            }
-        }
-        if (device.IsFormatlessImageLoadSupported()) {
-            AddCapability(spv::Capability::StorageImageReadWithoutFormat);
-        }
-        if (device.IsFloat16Supported()) {
-            AddCapability(spv::Capability::Float16);
-        }
-        t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half");
-        t_half = Name(TypeVector(t_scalar_half, 2), "half");
-
-        const Id main = Decompile();
-
-        switch (stage) {
-        case ShaderType::Vertex:
-            AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces);
-            break;
-        case ShaderType::TesselationControl:
-            AddCapability(spv::Capability::Tessellation);
-            AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces);
-            AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
-                             header.common2.threads_per_input_primitive);
-            break;
-        case ShaderType::TesselationEval: {
-            const auto& info = registry.GetGraphicsInfo();
-            AddCapability(spv::Capability::Tessellation);
-            AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
-            AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
-            AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
-            AddExecutionMode(main, info.tessellation_clockwise
-                                       ? spv::ExecutionMode::VertexOrderCw
-                                       : spv::ExecutionMode::VertexOrderCcw);
-            break;
-        }
-        case ShaderType::Geometry: {
-            const auto& info = registry.GetGraphicsInfo();
-            AddCapability(spv::Capability::Geometry);
-            AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
-            AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
-            AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
-            AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
-                             header.common4.max_output_vertices);
-            // TODO(Rodrigo): Where can we get this info from?
-            AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
-            break;
-        }
-        case ShaderType::Fragment:
-            AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
-            AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
-            if (header.ps.omap.depth) {
-                AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
-            }
-            if (specialization.early_fragment_tests) {
-                AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
-            }
-            break;
-        case ShaderType::Compute:
-            const auto workgroup_size = specialization.workgroup_size;
-            AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
-                             workgroup_size[1], workgroup_size[2]);
-            AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces);
-            break;
-        }
-    }
-
-private:
-    Id Decompile() {
-        DeclareCommon();
-        DeclareVertex();
-        DeclareTessControl();
-        DeclareTessEval();
-        DeclareGeometry();
-        DeclareFragment();
-        DeclareCompute();
-        DeclareRegisters();
-        DeclareCustomVariables();
-        DeclarePredicates();
-        DeclareLocalMemory();
-        DeclareSharedMemory();
-        DeclareInternalFlags();
-        DeclareInputAttributes();
-        DeclareOutputAttributes();
-
-        u32 binding = specialization.base_binding;
-        binding = DeclareConstantBuffers(binding);
-        binding = DeclareGlobalBuffers(binding);
-        binding = DeclareUniformTexels(binding);
-        binding = DeclareSamplers(binding);
-        binding = DeclareStorageTexels(binding);
-        binding = DeclareImages(binding);
-
-        const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
-        AddLabel();
-
-        if (ir.IsDecompiled()) {
-            DeclareFlowVariables();
-            DecompileAST();
-        } else {
-            AllocateLabels();
-            DecompileBranchMode();
-        }
-
-        OpReturn();
-        OpFunctionEnd();
-
-        return main;
-    }
-
-    void DefinePrologue() {
-        if (stage == ShaderType::Vertex) {
-            // Clear Position to avoid reading trash on the Z conversion.
-            const auto position_index = out_indices.position.value();
-            const Id position = AccessElement(t_out_float4, out_vertex, position_index);
-            OpStore(position, v_varying_default);
-
-            if (specialization.point_size) {
-                const u32 point_size_index = out_indices.point_size.value();
-                const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index);
-                OpStore(out_point_size, Constant(t_float, *specialization.point_size));
-            }
-        }
-    }
-
-    void DecompileAST();
-
-    void DecompileBranchMode() {
-        const u32 first_address = ir.GetBasicBlocks().begin()->first;
-        const Id loop_label = OpLabel("loop");
-        const Id merge_label = OpLabel("merge");
-        const Id dummy_label = OpLabel();
-        const Id jump_label = OpLabel();
-        continue_label = OpLabel("continue");
-
-        std::vector<Sirit::Literal> literals;
-        std::vector<Id> branch_labels;
-        for (const auto& [literal, label] : labels) {
-            literals.push_back(literal);
-            branch_labels.push_back(label);
-        }
-
-        jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
-                            spv::StorageClass::Function, Constant(t_uint, first_address));
-        AddLocalVariable(jmp_to);
-
-        std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
-        std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
-
-        Name(jmp_to, "jmp_to");
-        Name(ssy_flow_stack, "ssy_flow_stack");
-        Name(ssy_flow_stack_top, "ssy_flow_stack_top");
-        Name(pbk_flow_stack, "pbk_flow_stack");
-        Name(pbk_flow_stack_top, "pbk_flow_stack_top");
-
-        DefinePrologue();
-
-        OpBranch(loop_label);
-        AddLabel(loop_label);
-        OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
-        OpBranch(dummy_label);
-
-        AddLabel(dummy_label);
-        const Id default_branch = OpLabel();
-        const Id jmp_to_load = OpLoad(t_uint, jmp_to);
-        OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone);
-        OpSwitch(jmp_to_load, default_branch, literals, branch_labels);
-
-        AddLabel(default_branch);
-        OpReturn();
-
-        for (const auto& [address, bb] : ir.GetBasicBlocks()) {
-            AddLabel(labels.at(address));
-
-            VisitBasicBlock(bb);
-
-            const auto next_it = labels.lower_bound(address + 1);
-            const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
-            OpBranch(next_label);
-        }
-
-        AddLabel(jump_label);
-        OpBranch(continue_label);
-        AddLabel(continue_label);
-        OpBranch(loop_label);
-        AddLabel(merge_label);
-    }
-
-private:
-    friend class ASTDecompiler;
-    friend class ExprDecompiler;
-
-    static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
-
-    void AllocateLabels() {
-        for (const auto& pair : ir.GetBasicBlocks()) {
-            const u32 address = pair.first;
-            labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
-        }
-    }
-
-    void DeclareCommon() {
-        thread_id =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
-        thread_masks[0] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
-        thread_masks[1] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
-        thread_masks[2] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
-        thread_masks[3] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
-        thread_masks[4] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
-    }
-
-    void DeclareVertex() {
-        if (stage != ShaderType::Vertex) {
-            return;
-        }
-        Id out_vertex_struct;
-        std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
-        const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
-        out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output);
-        interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
-
-        // Declare input attributes
-        vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index");
-        instance_index =
-            DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index");
-        base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex");
-        base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance");
-    }
-
-    void DeclareTessControl() {
-        if (stage != ShaderType::TesselationControl) {
-            return;
-        }
-        DeclareInputVertexArray(NumInputPatches);
-        DeclareOutputVertexArray(header.common2.threads_per_input_primitive);
-
-        tess_level_outer = DeclareBuiltIn(
-            spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output,
-            TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))),
-            "tess_level_outer");
-        Decorate(tess_level_outer, spv::Decoration::Patch);
-
-        tess_level_inner = DeclareBuiltIn(
-            spv::BuiltIn::TessLevelInner, spv::StorageClass::Output,
-            TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))),
-            "tess_level_inner");
-        Decorate(tess_level_inner, spv::Decoration::Patch);
-
-        invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id");
-    }
-
-    void DeclareTessEval() {
-        if (stage != ShaderType::TesselationEval) {
-            return;
-        }
-        DeclareInputVertexArray(NumInputPatches);
-        DeclareOutputVertex();
-
-        tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord");
-    }
-
-    void DeclareGeometry() {
-        if (stage != ShaderType::Geometry) {
-            return;
-        }
-        const auto& info = registry.GetGraphicsInfo();
-        const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
-        DeclareInputVertexArray(num_input);
-        DeclareOutputVertex();
-    }
-
-    void DeclareFragment() {
-        if (stage != ShaderType::Fragment) {
-            return;
-        }
-
-        for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) {
-            if (!IsRenderTargetEnabled(rt)) {
-                continue;
-            }
-            const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
-            Name(id, fmt::format("frag_color{}", rt));
-            Decorate(id, spv::Decoration::Location, rt);
-
-            frag_colors[rt] = id;
-            interfaces.push_back(id);
-        }
-
-        if (header.ps.omap.depth) {
-            frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
-            Name(frag_depth, "frag_depth");
-            Decorate(frag_depth, spv::Decoration::BuiltIn,
-                     static_cast<u32>(spv::BuiltIn::FragDepth));
-
-            interfaces.push_back(frag_depth);
-        }
-
-        frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord");
-        front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing");
-        point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord");
-    }
-
-    void DeclareCompute() {
-        if (stage != ShaderType::Compute) {
-            return;
-        }
-
-        workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id");
-        local_invocation_id =
-            DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id");
-    }
-
-    void DeclareRegisters() {
-        for (const u32 gpr : ir.GetRegisters()) {
-            const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
-            Name(id, fmt::format("gpr_{}", gpr));
-            registers.emplace(gpr, AddGlobalVariable(id));
-        }
-    }
-
-    void DeclareCustomVariables() {
-        const u32 num_custom_variables = ir.GetNumCustomVariables();
-        for (u32 i = 0; i < num_custom_variables; ++i) {
-            const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
-            Name(id, fmt::format("custom_var_{}", i));
-            custom_variables.emplace(i, AddGlobalVariable(id));
-        }
-    }
-
-    void DeclarePredicates() {
-        for (const auto pred : ir.GetPredicates()) {
-            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
-            Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
-            predicates.emplace(pred, AddGlobalVariable(id));
-        }
-    }
-
-    void DeclareFlowVariables() {
-        for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
-            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
-            Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
-            flow_variables.emplace(i, AddGlobalVariable(id));
-        }
-    }
-
-    void DeclareLocalMemory() {
-        // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
-        // specialization time.
-        const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize();
-        if (lmem_size == 0) {
-            return;
-        }
-        const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4);
-        const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
-        const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
-        Name(type_pointer, "LocalMemory");
-
-        local_memory =
-            OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
-        AddGlobalVariable(Name(local_memory, "local_memory"));
-    }
-
-    void DeclareSharedMemory() {
-        if (stage != ShaderType::Compute) {
-            return;
-        }
-        t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
-
-        u32 smem_size = specialization.shared_memory_size * 4;
-        if (smem_size == 0) {
-            // Avoid declaring an empty array.
-            return;
-        }
-        const u32 limit = device.GetMaxComputeSharedMemorySize();
-        if (smem_size > limit) {
-            LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
-                      smem_size, limit);
-            smem_size = limit;
-        }
-
-        const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
-        const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
-        Name(type_pointer, "SharedMemory");
-
-        shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup);
-        AddGlobalVariable(Name(shared_memory, "shared_memory"));
-    }
-
-    void DeclareInternalFlags() {
-        static constexpr std::array names{"zero", "sign", "carry", "overflow"};
-
-        for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
-            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
-            internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
-        }
-    }
-
-    void DeclareInputVertexArray(u32 length) {
-        constexpr auto storage = spv::StorageClass::Input;
-        std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length);
-    }
-
-    void DeclareOutputVertexArray(u32 length) {
-        constexpr auto storage = spv::StorageClass::Output;
-        std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length);
-    }
-
-    std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class,
-                                                     std::string name, u32 length) {
-        const auto [struct_id, indices] = DeclareVertexStruct();
-        const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length));
-        const Id vertex_ptr = TypePointer(storage_class, vertex_array);
-        const Id vertex = OpVariable(vertex_ptr, storage_class);
-        AddGlobalVariable(Name(vertex, std::move(name)));
-        interfaces.push_back(vertex);
-        return {indices, vertex};
-    }
-
-    void DeclareOutputVertex() {
-        Id out_vertex_struct;
-        std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
-        const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
-        out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output);
-        interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
-    }
-
-    void DeclareInputAttributes() {
-        for (const auto index : ir.GetInputAttributes()) {
-            if (!IsGenericAttribute(index)) {
-                continue;
-            }
-            const u32 location = GetGenericAttributeLocation(index);
-            if (!IsAttributeEnabled(location)) {
-                continue;
-            }
-            const auto type_descriptor = GetAttributeType(location);
-            Id type;
-            if (IsInputAttributeArray()) {
-                type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3);
-                type = TypeArray(type, Constant(t_uint, GetNumInputVertices()));
-                type = TypePointer(spv::StorageClass::Input, type);
-            } else {
-                type = type_descriptor.vector;
-            }
-            const Id id = OpVariable(type, spv::StorageClass::Input);
-            AddGlobalVariable(Name(id, fmt::format("in_attr{}", location)));
-            input_attributes.emplace(index, id);
-            interfaces.push_back(id);
-
-            Decorate(id, spv::Decoration::Location, location);
-
-            if (stage != ShaderType::Fragment) {
-                continue;
-            }
-            switch (header.ps.GetPixelImap(location)) {
-            case PixelImap::Constant:
-                Decorate(id, spv::Decoration::Flat);
-                break;
-            case PixelImap::Perspective:
-                // Default
-                break;
-            case PixelImap::ScreenLinear:
-                Decorate(id, spv::Decoration::NoPerspective);
-                break;
-            default:
-                UNREACHABLE_MSG("Unused attribute being fetched");
-            }
-        }
-    }
-
-    void DeclareOutputAttributes() {
-        if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
-            return;
-        }
-
-        UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
-        for (const auto index : ir.GetOutputAttributes()) {
-            if (!IsGenericAttribute(index)) {
-                continue;
-            }
-            DeclareOutputAttribute(index);
-        }
-    }
-
-    void DeclareOutputAttribute(Attribute::Index index) {
-        static constexpr std::string_view swizzle = "xyzw";
-
-        const u32 location = GetGenericAttributeLocation(index);
-        u8 element = 0;
-        while (element < 4) {
-            const std::size_t remainder = 4 - element;
-
-            std::size_t num_components = remainder;
-            const std::optional tfb = GetTransformFeedbackInfo(index, element);
-            if (tfb) {
-                num_components = tfb->components;
-            }
-
-            Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
-            Id varying_default = v_varying_default;
-            if (IsOutputAttributeArray()) {
-                const u32 num = GetNumOutputVertices();
-                type = TypeArray(type, Constant(t_uint, num));
-                if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
-                    // Intel's proprietary driver fails to setup defaults for arrayed output
-                    // attributes.
-                    varying_default = ConstantComposite(type, std::vector(num, varying_default));
-                }
-            }
-            type = TypePointer(spv::StorageClass::Output, type);
-
-            std::string name = fmt::format("out_attr{}", location);
-            if (num_components < 4 || element > 0) {
-                name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
-            }
-
-            const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
-            Name(AddGlobalVariable(id), name);
-
-            GenericVaryingDescription description;
-            description.id = id;
-            description.first_element = element;
-            description.is_scalar = num_components == 1;
-            for (u32 i = 0; i < num_components; ++i) {
-                const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
-                output_attributes.emplace(offset, description);
-            }
-            interfaces.push_back(id);
-
-            Decorate(id, spv::Decoration::Location, location);
-            if (element > 0) {
-                Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
-            }
-            if (tfb && device.IsExtTransformFeedbackSupported()) {
-                Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
-                Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
-                Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
-            }
-
-            element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
-        }
-    }
-
-    std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
-        const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
-        const auto it = transform_feedback.find(location);
-        if (it == transform_feedback.end()) {
-            return {};
-        }
-        return it->second;
-    }
-
-    u32 DeclareConstantBuffers(u32 binding) {
-        for (const auto& [index, size] : ir.GetConstantBuffers()) {
-            const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
-                                                                               : t_cbuf_std140_ubo;
-            const Id id = OpVariable(type, spv::StorageClass::Uniform);
-            AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
-
-            Decorate(id, spv::Decoration::Binding, binding++);
-            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-            constant_buffers.emplace(index, id);
-        }
-        return binding;
-    }
-
-    u32 DeclareGlobalBuffers(u32 binding) {
-        for (const auto& [base, usage] : ir.GetGlobalMemory()) {
-            const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
-            AddGlobalVariable(
-                Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
-
-            Decorate(id, spv::Decoration::Binding, binding++);
-            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-            global_buffers.emplace(base, id);
-        }
-        return binding;
-    }
-
-    u32 DeclareUniformTexels(u32 binding) {
-        for (const auto& sampler : ir.GetSamplers()) {
-            if (!sampler.is_buffer) {
-                continue;
-            }
-            ASSERT(!sampler.is_array);
-            ASSERT(!sampler.is_shadow);
-
-            constexpr auto dim = spv::Dim::Buffer;
-            constexpr int depth = 0;
-            constexpr int arrayed = 0;
-            constexpr bool ms = false;
-            constexpr int sampled = 1;
-            constexpr auto format = spv::ImageFormat::Unknown;
-            const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
-            const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
-            const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
-            AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
-            Decorate(id, spv::Decoration::Binding, binding++);
-            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-
-            uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
-        }
-        return binding;
-    }
-
-    u32 DeclareSamplers(u32 binding) {
-        for (const auto& sampler : ir.GetSamplers()) {
-            if (sampler.is_buffer) {
-                continue;
-            }
-            const auto dim = GetSamplerDim(sampler);
-            const int depth = sampler.is_shadow ? 1 : 0;
-            const int arrayed = sampler.is_array ? 1 : 0;
-            constexpr bool ms = false;
-            constexpr int sampled = 1;
-            constexpr auto format = spv::ImageFormat::Unknown;
-            const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
-            const Id sampler_type = TypeSampledImage(image_type);
-            const Id sampler_pointer_type =
-                TypePointer(spv::StorageClass::UniformConstant, sampler_type);
-            const Id type = sampler.is_indexed
-                                ? TypeArray(sampler_type, Constant(t_uint, sampler.size))
-                                : sampler_type;
-            const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
-            const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
-            AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
-            Decorate(id, spv::Decoration::Binding, binding++);
-            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-
-            sampled_images.emplace(
-                sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id});
-        }
-        return binding;
-    }
-
-    u32 DeclareStorageTexels(u32 binding) {
-        for (const auto& image : ir.GetImages()) {
-            if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
-                continue;
-            }
-            DeclareImage(image, binding);
-        }
-        return binding;
-    }
-
-    u32 DeclareImages(u32 binding) {
-        for (const auto& image : ir.GetImages()) {
-            if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
-                continue;
-            }
-            DeclareImage(image, binding);
-        }
-        return binding;
-    }
-
-    void DeclareImage(const ImageEntry& image, u32& binding) {
-        const auto [dim, arrayed] = GetImageDim(image);
-        constexpr int depth = 0;
-        constexpr bool ms = false;
-        constexpr int sampled = 2; // This won't be accessed with a sampler
-        const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
-        const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
-        const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
-        const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
-        AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
-
-        Decorate(id, spv::Decoration::Binding, binding++);
-        Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-        if (image.is_read && !image.is_written) {
-            Decorate(id, spv::Decoration::NonWritable);
-        } else if (image.is_written && !image.is_read) {
-            Decorate(id, spv::Decoration::NonReadable);
-        }
-
-        images.emplace(image.index, StorageImage{image_type, id});
-    }
-
-    bool IsRenderTargetEnabled(u32 rt) const {
-        for (u32 component = 0; component < 4; ++component) {
-            if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    bool IsInputAttributeArray() const {
-        return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval ||
-               stage == ShaderType::Geometry;
-    }
-
-    bool IsOutputAttributeArray() const {
-        return stage == ShaderType::TesselationControl;
-    }
-
-    bool IsAttributeEnabled(u32 location) const {
-        return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
-    }
-
-    u32 GetNumInputVertices() const {
-        switch (stage) {
-        case ShaderType::Geometry:
-            return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
-        case ShaderType::TesselationControl:
-        case ShaderType::TesselationEval:
-            return NumInputPatches;
-        default:
-            UNREACHABLE();
-            return 1;
-        }
-    }
-
-    u32 GetNumOutputVertices() const {
-        switch (stage) {
-        case ShaderType::TesselationControl:
-            return header.common2.threads_per_input_primitive;
-        default:
-            UNREACHABLE();
-            return 1;
-        }
-    }
-
-    std::tuple<Id, VertexIndices> DeclareVertexStruct() {
-        struct BuiltIn {
-            Id type;
-            spv::BuiltIn builtin;
-            const char* name;
-        };
-        std::vector<BuiltIn> members;
-        members.reserve(4);
-
-        const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) {
-            const auto index = static_cast<u32>(members.size());
-            members.push_back(BuiltIn{type, builtin, name});
-            return index;
-        };
-
-        VertexIndices indices;
-        indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position");
-
-        if (ir.UsesLayer()) {
-            if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
-                indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer");
-            } else {
-                LOG_ERROR(
-                    Render_Vulkan,
-                    "Shader requires Layer but it's not supported on this stage with this device.");
-            }
-        }
-
-        if (ir.UsesViewportIndex()) {
-            if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
-                indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");
-            } else {
-                LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on "
-                                         "this stage with this device.");
-            }
-        }
-
-        if (ir.UsesPointSize() || specialization.point_size) {
-            indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size");
-        }
-
-        const auto& ir_output_attributes = ir.GetOutputAttributes();
-        const bool declare_clip_distances = std::any_of(
-            ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) {
-                return index == Attribute::Index::ClipDistances0123 ||
-                       index == Attribute::Index::ClipDistances4567;
-            });
-        if (declare_clip_distances) {
-            indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)),
-                                                spv::BuiltIn::ClipDistance, "clip_distances");
-        }
-
-        std::vector<Id> member_types;
-        member_types.reserve(members.size());
-        for (std::size_t i = 0; i < members.size(); ++i) {
-            member_types.push_back(members[i].type);
-        }
-        const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex");
-        Decorate(per_vertex_struct, spv::Decoration::Block);
-
-        for (std::size_t index = 0; index < members.size(); ++index) {
-            const auto& member = members[index];
-            MemberName(per_vertex_struct, static_cast<u32>(index), member.name);
-            MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn,
-                           static_cast<u32>(member.builtin));
-        }
-
-        return {per_vertex_struct, indices};
-    }
-
-    void VisitBasicBlock(const NodeBlock& bb) {
-        for (const auto& node : bb) {
-            Visit(node);
-        }
-    }
-
-    Expression Visit(const Node& node) {
-        if (const auto operation = std::get_if<OperationNode>(&*node)) {
-            if (const auto amend_index = operation->GetAmendIndex()) {
-                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
-                ASSERT(type == Type::Void);
-            }
-            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
-            const auto decompiler = operation_decompilers[operation_index];
-            if (decompiler == nullptr) {
-                UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
-            }
-            return (this->*decompiler)(*operation);
-        }
-
-        if (const auto gpr = std::get_if<GprNode>(&*node)) {
-            const u32 index = gpr->GetIndex();
-            if (index == Register::ZeroIndex) {
-                return {v_float_zero, Type::Float};
-            }
-            return {OpLoad(t_float, registers.at(index)), Type::Float};
-        }
-
-        if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
-            const u32 index = cv->GetIndex();
-            return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
-        }
-
-        if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
-            return {Constant(t_uint, immediate->GetValue()), Type::Uint};
-        }
-
-        if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
-            const auto value = [&]() -> Id {
-                switch (const auto index = predicate->GetIndex(); index) {
-                case Tegra::Shader::Pred::UnusedIndex:
-                    return v_true;
-                case Tegra::Shader::Pred::NeverExecute:
-                    return v_false;
-                default:
-                    return OpLoad(t_bool, predicates.at(index));
-                }
-            }();
-            if (predicate->IsNegated()) {
-                return {OpLogicalNot(t_bool, value), Type::Bool};
-            }
-            return {value, Type::Bool};
-        }
-
-        if (const auto abuf = std::get_if<AbufNode>(&*node)) {
-            const auto attribute = abuf->GetIndex();
-            const u32 element = abuf->GetElement();
-            const auto& buffer = abuf->GetBuffer();
-
-            const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
-                std::vector<Id> members;
-                members.reserve(std::size(indices) + 1);
-
-                if (buffer && IsInputAttributeArray()) {
-                    members.push_back(AsUint(Visit(buffer)));
-                }
-                for (const u32 index : indices) {
-                    members.push_back(Constant(t_uint, index));
-                }
-                return OpAccessChain(pointer_type, composite, members);
-            };
-
-            switch (attribute) {
-            case Attribute::Index::Position: {
-                if (stage == ShaderType::Fragment) {
-                    return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
-                            Type::Float};
-                }
-                const std::vector elements = {in_indices.position.value(), element};
-                return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float};
-            }
-            case Attribute::Index::PointCoord: {
-                switch (element) {
-                case 0:
-                case 1:
-                    return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element),
-                            Type::Float};
-                }
-                UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element);
-                return {v_float_zero, Type::Float};
-            }
-            case Attribute::Index::TessCoordInstanceIDVertexID:
-                // TODO(Subv): Find out what the values are for the first two elements when inside a
-                // vertex shader, and what's the value of the fourth element when inside a Tess Eval
-                // shader.
-                switch (element) {
-                case 0:
-                case 1:
-                    return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)),
-                            Type::Float};
-                case 2:
-                    return {
-                        OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)),
-                        Type::Int};
-                case 3:
-                    return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)),
-                            Type::Int};
-                }
-                UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
-                return {Constant(t_uint, 0U), Type::Uint};
-            case Attribute::Index::FrontFacing:
-                // TODO(Subv): Find out what the values are for the other elements.
-                ASSERT(stage == ShaderType::Fragment);
-                if (element == 3) {
-                    const Id is_front_facing = OpLoad(t_bool, front_facing);
-                    const Id true_value = Constant(t_int, static_cast<s32>(-1));
-                    const Id false_value = Constant(t_int, 0);
-                    return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int};
-                }
-                UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
-                return {v_float_zero, Type::Float};
-            default:
-                if (!IsGenericAttribute(attribute)) {
-                    break;
-                }
-                const u32 location = GetGenericAttributeLocation(attribute);
-                if (!IsAttributeEnabled(location)) {
-                    // Disabled attributes (also known as constant attributes) always return zero.
-                    return {v_float_zero, Type::Float};
-                }
-                const auto type_descriptor = GetAttributeType(location);
-                const Type type = type_descriptor.type;
-                const Id attribute_id = input_attributes.at(attribute);
-                const std::vector elements = {element};
-                const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
-                return {OpLoad(GetTypeDefinition(type), pointer), type};
-            }
-            UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
-            return {v_float_zero, Type::Float};
-        }
-
-        if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
-            const Node& offset = cbuf->GetOffset();
-            const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
-
-            Id pointer{};
-            if (device.IsKhrUniformBufferStandardLayoutSupported()) {
-                const Id buffer_offset =
-                    OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U));
-                pointer =
-                    OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset);
-            } else {
-                Id buffer_index{};
-                Id buffer_element{};
-                if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
-                    // Direct access
-                    const u32 offset_imm = immediate->GetValue();
-                    ASSERT(offset_imm % 4 == 0);
-                    buffer_index = Constant(t_uint, offset_imm / 16);
-                    buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
-                } else if (std::holds_alternative<OperationNode>(*offset)) {
-                    // Indirect access
-                    const Id offset_id = AsUint(Visit(offset));
-                    const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4));
-                    const Id final_offset =
-                        OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1));
-                    buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4));
-                    buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4));
-                } else {
-                    UNREACHABLE_MSG("Unmanaged offset node type");
-                }
-                pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
-                                        buffer_element);
-            }
-            return {OpLoad(t_float, pointer), Type::Float};
-        }
-
-        if (const auto gmem = std::get_if<GmemNode>(&*node)) {
-            return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
-        }
-
-        if (const auto lmem = std::get_if<LmemNode>(&*node)) {
-            Id address = AsUint(Visit(lmem->GetAddress()));
-            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-            const Id pointer = OpAccessChain(t_prv_float, local_memory, address);
-            return {OpLoad(t_float, pointer), Type::Float};
-        }
-
-        if (const auto smem = std::get_if<SmemNode>(&*node)) {
-            return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
-        }
-
-        if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
-            const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag()));
-            return {OpLoad(t_bool, flag), Type::Bool};
-        }
-
-        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
-            if (const auto amend_index = conditional->GetAmendIndex()) {
-                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
-                ASSERT(type == Type::Void);
-            }
-            // It's invalid to call conditional on nested nodes, use an operation instead
-            const Id true_label = OpLabel();
-            const Id skip_label = OpLabel();
-            const Id condition = AsBool(Visit(conditional->GetCondition()));
-            OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone);
-            OpBranchConditional(condition, true_label, skip_label);
-            AddLabel(true_label);
-
-            conditional_branch_set = true;
-            inside_branch = false;
-            VisitBasicBlock(conditional->GetCode());
-            conditional_branch_set = false;
-            if (!inside_branch) {
-                OpBranch(skip_label);
-            } else {
-                inside_branch = false;
-            }
-            AddLabel(skip_label);
-            return {};
-        }
-
-        if (const auto comment = std::get_if<CommentNode>(&*node)) {
-            if (device.HasDebuggingToolAttached()) {
-                // We should insert comments with OpString instead of using named variables
-                Name(OpUndef(t_int), comment->GetText());
-            }
-            return {};
-        }
-
-        UNREACHABLE();
-        return {};
-    }
-
-    template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
-    Expression Unary(Operation operation) {
-        const Id type_def = GetTypeDefinition(result_type);
-        const Id op_a = As(Visit(operation[0]), type_a);
-
-        const Id value = (this->*func)(type_def, op_a);
-        if (IsPrecise(operation)) {
-            Decorate(value, spv::Decoration::NoContraction);
-        }
-        return {value, result_type};
-    }
-
-    template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
-              Type type_b = type_a>
-    Expression Binary(Operation operation) {
-        const Id type_def = GetTypeDefinition(result_type);
-        const Id op_a = As(Visit(operation[0]), type_a);
-        const Id op_b = As(Visit(operation[1]), type_b);
-
-        const Id value = (this->*func)(type_def, op_a, op_b);
-        if (IsPrecise(operation)) {
-            Decorate(value, spv::Decoration::NoContraction);
-        }
-        return {value, result_type};
-    }
-
-    template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
-              Type type_b = type_a, Type type_c = type_b>
-    Expression Ternary(Operation operation) {
-        const Id type_def = GetTypeDefinition(result_type);
-        const Id op_a = As(Visit(operation[0]), type_a);
-        const Id op_b = As(Visit(operation[1]), type_b);
-        const Id op_c = As(Visit(operation[2]), type_c);
-
-        const Id value = (this->*func)(type_def, op_a, op_b, op_c);
-        if (IsPrecise(operation)) {
-            Decorate(value, spv::Decoration::NoContraction);
-        }
-        return {value, result_type};
-    }
-
-    template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
-              Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
-    Expression Quaternary(Operation operation) {
-        const Id type_def = GetTypeDefinition(result_type);
-        const Id op_a = As(Visit(operation[0]), type_a);
-        const Id op_b = As(Visit(operation[1]), type_b);
-        const Id op_c = As(Visit(operation[2]), type_c);
-        const Id op_d = As(Visit(operation[3]), type_d);
-
-        const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d);
-        if (IsPrecise(operation)) {
-            Decorate(value, spv::Decoration::NoContraction);
-        }
-        return {value, result_type};
-    }
-
-    Expression Assign(Operation operation) {
-        const Node& dest = operation[0];
-        const Node& src = operation[1];
-
-        Expression target{};
-        if (const auto gpr = std::get_if<GprNode>(&*dest)) {
-            if (gpr->GetIndex() == Register::ZeroIndex) {
-                // Writing to Register::ZeroIndex is a no op but we still have to visit its source
-                // because it might have side effects.
-                Visit(src);
-                return {};
-            }
-            target = {registers.at(gpr->GetIndex()), Type::Float};
-
-        } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
-            const auto& buffer = abuf->GetBuffer();
-            const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
-                std::vector<Id> members;
-                members.reserve(std::size(indices) + 1);
-
-                if (buffer && IsOutputAttributeArray()) {
-                    members.push_back(AsUint(Visit(buffer)));
-                }
-                for (const u32 index : indices) {
-                    members.push_back(Constant(t_uint, index));
-                }
-                return OpAccessChain(pointer_type, composite, members);
-            };
-
-            target = [&]() -> Expression {
-                const u32 element = abuf->GetElement();
-                switch (const auto attribute = abuf->GetIndex(); attribute) {
-                case Attribute::Index::Position: {
-                    const u32 index = out_indices.position.value();
-                    return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float};
-                }
-                case Attribute::Index::LayerViewportPointSize:
-                    switch (element) {
-                    case 1: {
-                        if (!out_indices.layer) {
-                            return {};
-                        }
-                        const u32 index = out_indices.layer.value();
-                        return {AccessElement(t_out_int, out_vertex, index), Type::Int};
-                    }
-                    case 2: {
-                        if (!out_indices.viewport) {
-                            return {};
-                        }
-                        const u32 index = out_indices.viewport.value();
-                        return {AccessElement(t_out_int, out_vertex, index), Type::Int};
-                    }
-                    case 3: {
-                        const auto index = out_indices.point_size.value();
-                        return {AccessElement(t_out_float, out_vertex, index), Type::Float};
-                    }
-                    default:
-                        UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement());
-                        return {};
-                    }
-                case Attribute::Index::ClipDistances0123: {
-                    const u32 index = out_indices.clip_distances.value();
-                    return {AccessElement(t_out_float, out_vertex, index, element), Type::Float};
-                }
-                case Attribute::Index::ClipDistances4567: {
-                    const u32 index = out_indices.clip_distances.value();
-                    return {AccessElement(t_out_float, out_vertex, index, element + 4),
-                            Type::Float};
-                }
-                default:
-                    if (IsGenericAttribute(attribute)) {
-                        const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
-                        const GenericVaryingDescription description = output_attributes.at(offset);
-                        const Id composite = description.id;
-                        std::vector<u32> indices;
-                        if (!description.is_scalar) {
-                            indices.push_back(element - description.first_element);
-                        }
-                        return {ArrayPass(t_out_float, composite, indices), Type::Float};
-                    }
-                    UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
-                                      static_cast<u32>(attribute));
-                    return {};
-                }
-            }();
-
-        } else if (const auto patch = std::get_if<PatchNode>(&*dest)) {
-            target = [&]() -> Expression {
-                const u32 offset = patch->GetOffset();
-                switch (offset) {
-                case 0:
-                case 1:
-                case 2:
-                case 3:
-                    return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float};
-                case 4:
-                case 5:
-                    return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float};
-                }
-                UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset);
-                return {};
-            }();
-
-        } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
-            Id address = AsUint(Visit(lmem->GetAddress()));
-            address = OpUDiv(t_uint, address, Constant(t_uint, 4));
-            target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
-
-        } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
-            target = {GetSharedMemoryPointer(*smem), Type::Uint};
-
-        } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
-            target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
-
-        } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
-            target = {custom_variables.at(cv->GetIndex()), Type::Float};
-
-        } else {
-            UNIMPLEMENTED();
-        }
-
-        if (!target.id) {
-            // On failure we return a nullptr target.id, skip these stores.
-            return {};
-        }
-
-        OpStore(target.id, As(Visit(src), target.type));
-        return {};
-    }
-
-    template <u32 offset>
-    Expression FCastHalf(Operation operation) {
-        const Id value = AsHalfFloat(Visit(operation[0]));
-        return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)),
-                Type::Float};
-    }
-
-    Expression FSwizzleAdd(Operation operation) {
-        const Id minus = Constant(t_float, -1.0f);
-        const Id plus = v_float_one;
-        const Id zero = v_float_zero;
-        const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero);
-        const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus);
-
-        Id mask = OpLoad(t_uint, thread_id);
-        mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
-        mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1));
-        mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask);
-        mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
-
-        const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask);
-        const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask);
-
-        const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a);
-        const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b);
-        return {OpFAdd(t_float, op_a, op_b), Type::Float};
-    }
-
-    Expression HNegate(Operation operation) {
-        const bool is_f16 = device.IsFloat16Supported();
-        const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000);
-        const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000);
-        const auto GetNegate = [&](std::size_t index) {
-            return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one);
-        };
-        const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2));
-        return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat};
-    }
-
-    Expression HClamp(Operation operation) {
-        const auto Pack = [&](std::size_t index) {
-            const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index])));
-            return OpCompositeConstruct(t_half, scalar, scalar);
-        };
-        const Id value = AsHalfFloat(Visit(operation[0]));
-        const Id min = Pack(1);
-        const Id max = Pack(2);
-
-        const Id clamped = OpFClamp(t_half, value, min, max);
-        if (IsPrecise(operation)) {
-            Decorate(clamped, spv::Decoration::NoContraction);
-        }
-        return {clamped, Type::HalfFloat};
-    }
-
-    Expression HCastFloat(Operation operation) {
-        const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
-        return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat};
-    }
-
-    Expression HUnpack(Operation operation) {
-        Expression operand = Visit(operation[0]);
-        const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta());
-        if (type == Tegra::Shader::HalfType::H0_H1) {
-            return operand;
-        }
-        const auto value = [&] {
-            switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
-            case Tegra::Shader::HalfType::F32:
-                return GetHalfScalarFromFloat(AsFloat(operand));
-            case Tegra::Shader::HalfType::H0_H0:
-                return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0);
-            case Tegra::Shader::HalfType::H1_H1:
-                return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1);
-            default:
-                UNREACHABLE();
-                return ConstantNull(t_half);
-            }
-        }();
-        return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat};
-    }
-
-    Expression HMergeF32(Operation operation) {
-        const Id value = AsHalfFloat(Visit(operation[0]));
-        return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float};
-    }
-
-    template <u32 offset>
-    Expression HMergeHN(Operation operation) {
-        const Id target = AsHalfFloat(Visit(operation[0]));
-        const Id source = AsHalfFloat(Visit(operation[1]));
-        const Id object = OpCompositeExtract(t_scalar_half, source, offset);
-        return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat};
-    }
-
-    Expression HPack2(Operation operation) {
-        const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
-        const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1])));
-        return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat};
-    }
-
-    Expression LogicalAddCarry(Operation operation) {
-        const Id op_a = AsUint(Visit(operation[0]));
-        const Id op_b = AsUint(Visit(operation[1]));
-
-        const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
-        const Id carry = OpCompositeExtract(t_uint, result, 1);
-        return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
-    }
-
-    Expression LogicalAssign(Operation operation) {
-        const Node& dest = operation[0];
-        const Node& src = operation[1];
-
-        Id target{};
-        if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
-            ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-            case Tegra::Shader::Pred::UnusedIndex:
-                // Writing to these predicates is a no-op
-                return {};
-            }
-            target = predicates.at(index);
-
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
-            target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
-        }
-
-        OpStore(target, AsBool(Visit(src)));
-        return {};
-    }
-
-    Expression LogicalFOrdered(Operation operation) {
-        // Emulate SPIR-V's OpOrdered
-        const Id op_a = AsFloat(Visit(operation[0]));
-        const Id op_b = AsFloat(Visit(operation[1]));
-        const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
-        const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
-        return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
-    }
-
-    Expression LogicalFUnordered(Operation operation) {
-        // Emulate SPIR-V's OpUnordered
-        const Id op_a = AsFloat(Visit(operation[0]));
-        const Id op_b = AsFloat(Visit(operation[1]));
-        const Id is_nan_a = OpIsNan(t_bool, op_a);
-        const Id is_nan_b = OpIsNan(t_bool, op_b);
-        return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
-    }
-
-    Id GetTextureSampler(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        ASSERT(!meta.sampler.is_buffer);
-
-        const auto& entry = sampled_images.at(meta.sampler.index);
-        Id sampler = entry.variable;
-        if (meta.sampler.is_indexed) {
-            const Id index = AsInt(Visit(meta.index));
-            sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
-        }
-        return OpLoad(entry.sampler_type, sampler);
-    }
-
-    Id GetTextureImage(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        const u32 index = meta.sampler.index;
-        if (meta.sampler.is_buffer) {
-            const auto& entry = uniform_texels.at(index);
-            return OpLoad(entry.image_type, entry.image);
-        } else {
-            const auto& entry = sampled_images.at(index);
-            return OpImage(entry.image_type, GetTextureSampler(operation));
-        }
-    }
-
-    Id GetImage(Operation operation) {
-        const auto& meta = std::get<MetaImage>(operation.GetMeta());
-        const auto entry = images.at(meta.image.index);
-        return OpLoad(entry.image_type, entry.image);
-    }
-
-    Id AssembleVector(const std::vector<Id>& coords, Type type) {
-        const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1);
-        return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords);
-    }
-
-    Id GetCoordinates(Operation operation, Type type) {
-        std::vector<Id> coords;
-        for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
-            coords.push_back(As(Visit(operation[i]), type));
-        }
-        if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) {
-            // Add array coordinate for textures
-            if (meta->sampler.is_array) {
-                Id array = AsInt(Visit(meta->array));
-                if (type == Type::Float) {
-                    array = OpConvertSToF(t_float, array);
-                }
-                coords.push_back(array);
-            }
-        }
-        return AssembleVector(coords, type);
-    }
-
-    Id GetOffsetCoordinates(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        std::vector<Id> coords;
-        coords.reserve(meta.aoffi.size());
-        for (const auto& coord : meta.aoffi) {
-            coords.push_back(AsInt(Visit(coord)));
-        }
-        return AssembleVector(coords, Type::Int);
-    }
-
-    std::pair<Id, Id> GetDerivatives(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        const auto& derivatives = meta.derivates;
-        ASSERT(derivatives.size() % 2 == 0);
-
-        const std::size_t components = derivatives.size() / 2;
-        std::vector<Id> dx, dy;
-        dx.reserve(components);
-        dy.reserve(components);
-        for (std::size_t index = 0; index < components; ++index) {
-            dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0))));
-            dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1))));
-        }
-        return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)};
-    }
-
-    Expression GetTextureElement(Operation operation, Id sample_value, Type type) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        const auto type_def = GetTypeDefinition(type);
-        return {OpCompositeExtract(type_def, sample_value, meta.element), type};
-    }
-
-    Expression Texture(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
-        const bool can_implicit = stage == ShaderType::Fragment;
-        const Id sampler = GetTextureSampler(operation);
-        const Id coords = GetCoordinates(operation, Type::Float);
-
-        std::vector<Id> operands;
-        spv::ImageOperandsMask mask{};
-        if (meta.bias) {
-            mask = mask | spv::ImageOperandsMask::Bias;
-            operands.push_back(AsFloat(Visit(meta.bias)));
-        }
-
-        if (!can_implicit) {
-            mask = mask | spv::ImageOperandsMask::Lod;
-            operands.push_back(v_float_zero);
-        }
-
-        if (!meta.aoffi.empty()) {
-            mask = mask | spv::ImageOperandsMask::Offset;
-            operands.push_back(GetOffsetCoordinates(operation));
-        }
-
-        if (meta.depth_compare) {
-            // Depth sampling
-            UNIMPLEMENTED_IF(meta.bias);
-            const Id dref = AsFloat(Visit(meta.depth_compare));
-            if (can_implicit) {
-                return {
-                    OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
-                    Type::Float};
-            } else {
-                return {
-                    OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
-                    Type::Float};
-            }
-        }
-
-        Id texture;
-        if (can_implicit) {
-            texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
-        } else {
-            texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
-        }
-        return GetTextureElement(operation, texture, Type::Float);
-    }
-
-    Expression TextureLod(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
-        const Id sampler = GetTextureSampler(operation);
-        const Id coords = GetCoordinates(operation, Type::Float);
-        const Id lod = AsFloat(Visit(meta.lod));
-
-        spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
-        std::vector<Id> operands{lod};
-
-        if (!meta.aoffi.empty()) {
-            mask = mask | spv::ImageOperandsMask::Offset;
-            operands.push_back(GetOffsetCoordinates(operation));
-        }
-
-        if (meta.sampler.is_shadow) {
-            const Id dref = AsFloat(Visit(meta.depth_compare));
-            return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
-                    Type::Float};
-        }
-        const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
-        return GetTextureElement(operation, texture, Type::Float);
-    }
-
-    Expression TextureGather(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
-        const Id coords = GetCoordinates(operation, Type::Float);
-
-        spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
-        std::vector<Id> operands;
-        Id texture{};
-
-        if (!meta.aoffi.empty()) {
-            mask = mask | spv::ImageOperandsMask::Offset;
-            operands.push_back(GetOffsetCoordinates(operation));
-        }
-
-        if (meta.sampler.is_shadow) {
-            texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
-                                        AsFloat(Visit(meta.depth_compare)), mask, operands);
-        } else {
-            u32 component_value = 0;
-            if (meta.component) {
-                const auto component = std::get_if<ImmediateNode>(&*meta.component);
-                ASSERT_MSG(component, "Component is not an immediate value");
-                component_value = component->GetValue();
-            }
-            texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
-                                    Constant(t_uint, component_value), mask, operands);
-        }
-        return GetTextureElement(operation, texture, Type::Float);
-    }
-
-    Expression TextureQueryDimensions(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        UNIMPLEMENTED_IF(!meta.aoffi.empty());
-        UNIMPLEMENTED_IF(meta.depth_compare);
-
-        const auto image_id = GetTextureImage(operation);
-        if (meta.element == 3) {
-            return {OpImageQueryLevels(t_int, image_id), Type::Int};
-        }
-
-        const Id lod = AsUint(Visit(operation[0]));
-        const std::size_t coords_count = [&meta] {
-            switch (const auto type = meta.sampler.type) {
-            case Tegra::Shader::TextureType::Texture1D:
-                return 1;
-            case Tegra::Shader::TextureType::Texture2D:
-            case Tegra::Shader::TextureType::TextureCube:
-                return 2;
-            case Tegra::Shader::TextureType::Texture3D:
-                return 3;
-            default:
-                UNREACHABLE_MSG("Invalid texture type={}", type);
-                return 2;
-            }
-        }();
-
-        if (meta.element >= coords_count) {
-            return {v_float_zero, Type::Float};
-        }
-
-        const std::array<Id, 3> types = {t_int, t_int2, t_int3};
-        const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod);
-        const Id size = OpCompositeExtract(t_int, sizes, meta.element);
-        return {size, Type::Int};
-    }
-
-    Expression TextureQueryLod(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        UNIMPLEMENTED_IF(!meta.aoffi.empty());
-        UNIMPLEMENTED_IF(meta.depth_compare);
-
-        if (meta.element >= 2) {
-            UNREACHABLE_MSG("Invalid element");
-            return {v_float_zero, Type::Float};
-        }
-        const auto sampler_id = GetTextureSampler(operation);
-
-        const Id multiplier = Constant(t_float, 256.0f);
-        const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier);
-
-        const Id coords = GetCoordinates(operation, Type::Float);
-        Id size = OpImageQueryLod(t_float2, sampler_id, coords);
-        size = OpFMul(t_float2, size, multipliers);
-        size = OpConvertFToS(t_int2, size);
-        return GetTextureElement(operation, size, Type::Int);
-    }
-
-    Expression TexelFetch(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        UNIMPLEMENTED_IF(meta.depth_compare);
-
-        const Id image = GetTextureImage(operation);
-        const Id coords = GetCoordinates(operation, Type::Int);
-
-        spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
-        std::vector<Id> operands;
-        Id fetch;
-
-        if (meta.lod && !meta.sampler.is_buffer) {
-            mask = mask | spv::ImageOperandsMask::Lod;
-            operands.push_back(AsInt(Visit(meta.lod)));
-        }
-
-        if (!meta.aoffi.empty()) {
-            mask = mask | spv::ImageOperandsMask::Offset;
-            operands.push_back(GetOffsetCoordinates(operation));
-        }
-
-        fetch = OpImageFetch(t_float4, image, coords, mask, operands);
-        return GetTextureElement(operation, fetch, Type::Float);
-    }
-
-    Expression TextureGradient(Operation operation) {
-        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        UNIMPLEMENTED_IF(!meta.aoffi.empty());
-
-        const Id sampler = GetTextureSampler(operation);
-        const Id coords = GetCoordinates(operation, Type::Float);
-        const auto [dx, dy] = GetDerivatives(operation);
-        const std::vector grad = {dx, dy};
-
-        static constexpr auto mask = spv::ImageOperandsMask::Grad;
-        const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
-        return GetTextureElement(operation, texture, Type::Float);
-    }
-
-    Expression ImageLoad(Operation operation) {
-        if (!device.IsFormatlessImageLoadSupported()) {
-            return {v_float_zero, Type::Float};
-        }
-
-        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-
-        const Id coords = GetCoordinates(operation, Type::Int);
-        const Id texel = OpImageRead(t_uint4, GetImage(operation), coords);
-
-        return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};
-    }
-
-    Expression ImageStore(Operation operation) {
-        const auto meta{std::get<MetaImage>(operation.GetMeta())};
-        std::vector<Id> colors;
-        for (const auto& value : meta.values) {
-            colors.push_back(AsUint(Visit(value)));
-        }
-
-        const Id coords = GetCoordinates(operation, Type::Int);
-        const Id texel = OpCompositeConstruct(t_uint4, colors);
-
-        OpImageWrite(GetImage(operation), coords, texel, {});
-        return {};
-    }
-
-    template <Id (Module::*func)(Id, Id, Id, Id, Id)>
-    Expression AtomicImage(Operation operation) {
-        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-        ASSERT(meta.values.size() == 1);
-
-        const Id coordinate = GetCoordinates(operation, Type::Int);
-        const Id image = images.at(meta.image.index).image;
-        const Id sample = v_uint_zero;
-        const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
-
-        const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
-        const Id semantics = v_uint_zero;
-        const Id value = AsUint(Visit(meta.values[0]));
-        return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
-    }
-
-    template <Id (Module::*func)(Id, Id, Id, Id, Id)>
-    Expression Atomic(Operation operation) {
-        Id pointer;
-        if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
-            pointer = GetSharedMemoryPointer(*smem);
-        } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
-            pointer = GetGlobalMemoryPointer(*gmem);
-        } else {
-            UNREACHABLE();
-            return {v_float_zero, Type::Float};
-        }
-        const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
-        const Id semantics = v_uint_zero;
-        const Id value = AsUint(Visit(operation[1]));
-
-        return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
-    }
-
-    template <Id (Module::*func)(Id, Id, Id, Id, Id)>
-    Expression Reduce(Operation operation) {
-        Atomic<func>(operation);
-        return {};
-    }
-
-    Expression Branch(Operation operation) {
-        const auto& target = std::get<ImmediateNode>(*operation[0]);
-        OpStore(jmp_to, Constant(t_uint, target.GetValue()));
-        OpBranch(continue_label);
-        inside_branch = true;
-        if (!conditional_branch_set) {
-            AddLabel();
-        }
-        return {};
-    }
-
-    Expression BranchIndirect(Operation operation) {
-        const Id op_a = AsUint(Visit(operation[0]));
-
-        OpStore(jmp_to, op_a);
-        OpBranch(continue_label);
-        inside_branch = true;
-        if (!conditional_branch_set) {
-            AddLabel();
-        }
-        return {};
-    }
-
-    Expression PushFlowStack(Operation operation) {
-        const auto& target = std::get<ImmediateNode>(*operation[0]);
-        const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
-        const Id current = OpLoad(t_uint, flow_stack_top);
-        const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1));
-        const Id access = OpAccessChain(t_func_uint, flow_stack, current);
-
-        OpStore(access, Constant(t_uint, target.GetValue()));
-        OpStore(flow_stack_top, next);
-        return {};
-    }
-
-    Expression PopFlowStack(Operation operation) {
-        const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
-        const Id current = OpLoad(t_uint, flow_stack_top);
-        const Id previous = OpISub(t_uint, current, Constant(t_uint, 1));
-        const Id access = OpAccessChain(t_func_uint, flow_stack, previous);
-        const Id target = OpLoad(t_uint, access);
-
-        OpStore(flow_stack_top, previous);
-        OpStore(jmp_to, target);
-        OpBranch(continue_label);
-        inside_branch = true;
-        if (!conditional_branch_set) {
-            AddLabel();
-        }
-        return {};
-    }
-
-    Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) {
-        using Compare = Maxwell::ComparisonOp;
-        switch (compare_op) {
-        case Compare::NeverOld:
-            return v_false; // Never let the test pass
-        case Compare::LessOld:
-            return OpFOrdLessThan(t_bool, operand_1, operand_2);
-        case Compare::EqualOld:
-            return OpFOrdEqual(t_bool, operand_1, operand_2);
-        case Compare::LessEqualOld:
-            return OpFOrdLessThanEqual(t_bool, operand_1, operand_2);
-        case Compare::GreaterOld:
-            return OpFOrdGreaterThan(t_bool, operand_1, operand_2);
-        case Compare::NotEqualOld:
-            return OpFOrdNotEqual(t_bool, operand_1, operand_2);
-        case Compare::GreaterEqualOld:
-            return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
-        default:
-            UNREACHABLE();
-            return v_true;
-        }
-    }
-
-    void AlphaTest(Id pointer) {
-        if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) {
-            return;
-        }
-        const Id true_label = OpLabel();
-        const Id discard_label = OpLabel();
-        const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref);
-        const Id alpha_value = OpLoad(t_float, pointer);
-        const Id condition =
-            MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference);
-
-        OpBranchConditional(condition, true_label, discard_label);
-        AddLabel(discard_label);
-        OpKill();
-        AddLabel(true_label);
-    }
-
-    void PreExit() {
-        if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
-            const u32 position_index = out_indices.position.value();
-            const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
-            const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
-            Id depth = OpLoad(t_float, z_pointer);
-            depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer));
-            depth = OpFMul(t_float, depth, Constant(t_float, 0.5f));
-            OpStore(z_pointer, depth);
-        }
-        if (stage == ShaderType::Fragment) {
-            const auto SafeGetRegister = [this](u32 reg) {
-                if (const auto it = registers.find(reg); it != registers.end()) {
-                    return OpLoad(t_float, it->second);
-                }
-                return v_float_zero;
-            };
-
-            UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
-                                 "Sample mask write is unimplemented");
-
-            // Write the color outputs using the data in the shader registers, disabled
-            // rendertargets/components are skipped in the register assignment.
-            u32 current_reg = 0;
-            for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
-                // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
-                for (u32 component = 0; component < 4; ++component) {
-                    if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
-                        continue;
-                    }
-                    const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
-                    OpStore(pointer, SafeGetRegister(current_reg));
-                    if (rt == 0 && component == 3) {
-                        AlphaTest(pointer);
-                    }
-                    ++current_reg;
-                }
-            }
-            if (header.ps.omap.depth) {
-                // The depth output is always 2 registers after the last color output, and
-                // current_reg already contains one past the last color register.
-                OpStore(frag_depth, SafeGetRegister(current_reg + 1));
-            }
-        }
-    }
-
-    Expression Exit(Operation operation) {
-        PreExit();
-        inside_branch = true;
-        if (conditional_branch_set) {
-            OpReturn();
-        } else {
-            const Id dummy = OpLabel();
-            OpBranch(dummy);
-            AddLabel(dummy);
-            OpReturn();
-            AddLabel();
-        }
-        return {};
-    }
-
-    Expression Discard(Operation operation) {
-        inside_branch = true;
-        if (conditional_branch_set) {
-            OpKill();
-        } else {
-            const Id dummy = OpLabel();
-            OpBranch(dummy);
-            AddLabel(dummy);
-            OpKill();
-            AddLabel();
-        }
-        return {};
-    }
-
-    Expression EmitVertex(Operation) {
-        OpEmitVertex();
-        return {};
-    }
-
-    Expression EndPrimitive(Operation operation) {
-        OpEndPrimitive();
-        return {};
-    }
-
-    Expression InvocationId(Operation) {
-        return {OpLoad(t_int, invocation_id), Type::Int};
-    }
-
-    Expression YNegate(Operation) {
-        LOG_WARNING(Render_Vulkan, "(STUBBED)");
-        return {Constant(t_float, 1.0f), Type::Float};
-    }
-
-    template <u32 element>
-    Expression LocalInvocationId(Operation) {
-        const Id id = OpLoad(t_uint3, local_invocation_id);
-        return {OpCompositeExtract(t_uint, id, element), Type::Uint};
-    }
-
-    template <u32 element>
-    Expression WorkGroupId(Operation operation) {
-        const Id id = OpLoad(t_uint3, workgroup_id);
-        return {OpCompositeExtract(t_uint, id, element), Type::Uint};
-    }
-
-    Expression BallotThread(Operation operation) {
-        const Id predicate = AsBool(Visit(operation[0]));
-        const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate);
-
-        if (!device.IsWarpSizePotentiallyBiggerThanGuest()) {
-            // Guest-like devices can just return the first index.
-            return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint};
-        }
-
-        // The others will have to return what is local to the current thread.
-        // For instance a device with a warp size of 64 will return the upper uint when the current
-        // thread is 38.
-        const Id tid = OpLoad(t_uint, thread_id);
-        const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5));
-        return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint};
-    }
-
-    template <Id (Module::*func)(Id, Id)>
-    Expression Vote(Operation operation) {
-        // TODO(Rodrigo): Handle devices with different warp sizes
-        const Id predicate = AsBool(Visit(operation[0]));
-        return {(this->*func)(t_bool, predicate), Type::Bool};
-    }
-
-    Expression ThreadId(Operation) {
-        return {OpLoad(t_uint, thread_id), Type::Uint};
-    }
-
-    template <std::size_t index>
-    Expression ThreadMask(Operation) {
-        // TODO(Rodrigo): Handle devices with different warp sizes
-        const Id mask = thread_masks[index];
-        return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
-    }
-
-    Expression ShuffleIndexed(Operation operation) {
-        const Id value = AsFloat(Visit(operation[0]));
-        const Id index = AsUint(Visit(operation[1]));
-        return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
-    }
-
-    Expression Barrier(Operation) {
-        if (!ir.IsDecompiled()) {
-            LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
-            return {};
-        }
-
-        const auto scope = spv::Scope::Workgroup;
-        const auto memory = spv::Scope::Workgroup;
-        const auto semantics =
-            spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
-        OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
-                         Constant(t_uint, static_cast<u32>(memory)),
-                         Constant(t_uint, static_cast<u32>(semantics)));
-        return {};
-    }
-
-    template <spv::Scope scope>
-    Expression MemoryBarrier(Operation) {
-        const auto semantics =
-            spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
-            spv::MemorySemanticsMask::WorkgroupMemory |
-            spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
-
-        OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
-                        Constant(t_uint, static_cast<u32>(semantics)));
-        return {};
-    }
-
-    Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
-        const Id id = OpVariable(type, storage);
-        Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
-        AddGlobalVariable(Name(id, std::move(name)));
-        interfaces.push_back(id);
-        return id;
-    }
-
-    Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) {
-        return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
-    }
-
-    template <typename... Args>
-    Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
-        std::vector<Id> members;
-        auto elements = {elements_...};
-        for (const auto element : elements) {
-            members.push_back(Constant(t_uint, element));
-        }
-
-        return OpAccessChain(pointer_type, composite, members);
-    }
-
-    Id As(Expression expr, Type wanted_type) {
-        switch (wanted_type) {
-        case Type::Bool:
-            return AsBool(expr);
-        case Type::Bool2:
-            return AsBool2(expr);
-        case Type::Float:
-            return AsFloat(expr);
-        case Type::Int:
-            return AsInt(expr);
-        case Type::Uint:
-            return AsUint(expr);
-        case Type::HalfFloat:
-            return AsHalfFloat(expr);
-        default:
-            UNREACHABLE();
-            return expr.id;
-        }
-    }
-
-    Id AsBool(Expression expr) {
-        ASSERT(expr.type == Type::Bool);
-        return expr.id;
-    }
-
-    Id AsBool2(Expression expr) {
-        ASSERT(expr.type == Type::Bool2);
-        return expr.id;
-    }
-
-    Id AsFloat(Expression expr) {
-        switch (expr.type) {
-        case Type::Float:
-            return expr.id;
-        case Type::Int:
-        case Type::Uint:
-            return OpBitcast(t_float, expr.id);
-        case Type::HalfFloat:
-            if (device.IsFloat16Supported()) {
-                return OpBitcast(t_float, expr.id);
-            }
-            return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id));
-        default:
-            UNREACHABLE();
-            return expr.id;
-        }
-    }
-
-    Id AsInt(Expression expr) {
-        switch (expr.type) {
-        case Type::Int:
-            return expr.id;
-        case Type::Float:
-        case Type::Uint:
-            return OpBitcast(t_int, expr.id);
-        case Type::HalfFloat:
-            if (device.IsFloat16Supported()) {
-                return OpBitcast(t_int, expr.id);
-            }
-            return OpPackHalf2x16(t_int, expr.id);
-        default:
-            UNREACHABLE();
-            return expr.id;
-        }
-    }
-
-    Id AsUint(Expression expr) {
-        switch (expr.type) {
-        case Type::Uint:
-            return expr.id;
-        case Type::Float:
-        case Type::Int:
-            return OpBitcast(t_uint, expr.id);
-        case Type::HalfFloat:
-            if (device.IsFloat16Supported()) {
-                return OpBitcast(t_uint, expr.id);
-            }
-            return OpPackHalf2x16(t_uint, expr.id);
-        default:
-            UNREACHABLE();
-            return expr.id;
-        }
-    }
-
-    Id AsHalfFloat(Expression expr) {
-        switch (expr.type) {
-        case Type::HalfFloat:
-            return expr.id;
-        case Type::Float:
-        case Type::Int:
-        case Type::Uint:
-            if (device.IsFloat16Supported()) {
-                return OpBitcast(t_half, expr.id);
-            }
-            return OpUnpackHalf2x16(t_half, AsUint(expr));
-        default:
-            UNREACHABLE();
-            return expr.id;
-        }
-    }
-
-    Id GetHalfScalarFromFloat(Id value) {
-        if (device.IsFloat16Supported()) {
-            return OpFConvert(t_scalar_half, value);
-        }
-        return value;
-    }
-
-    Id GetFloatFromHalfScalar(Id value) {
-        if (device.IsFloat16Supported()) {
-            return OpFConvert(t_float, value);
-        }
-        return value;
-    }
-
-    AttributeType GetAttributeType(u32 location) const {
-        if (stage != ShaderType::Vertex) {
-            return {Type::Float, t_in_float, t_in_float4};
-        }
-        switch (specialization.attribute_types.at(location)) {
-        case Maxwell::VertexAttribute::Type::SignedNorm:
-        case Maxwell::VertexAttribute::Type::UnsignedNorm:
-        case Maxwell::VertexAttribute::Type::UnsignedScaled:
-        case Maxwell::VertexAttribute::Type::SignedScaled:
-        case Maxwell::VertexAttribute::Type::Float:
-            return {Type::Float, t_in_float, t_in_float4};
-        case Maxwell::VertexAttribute::Type::SignedInt:
-            return {Type::Int, t_in_int, t_in_int4};
-        case Maxwell::VertexAttribute::Type::UnsignedInt:
-            return {Type::Uint, t_in_uint, t_in_uint4};
-        default:
-            UNREACHABLE();
-            return {Type::Float, t_in_float, t_in_float4};
-        }
-    }
-
-    Id GetTypeDefinition(Type type) const {
-        switch (type) {
-        case Type::Bool:
-            return t_bool;
-        case Type::Bool2:
-            return t_bool2;
-        case Type::Float:
-            return t_float;
-        case Type::Int:
-            return t_int;
-        case Type::Uint:
-            return t_uint;
-        case Type::HalfFloat:
-            return t_half;
-        default:
-            UNREACHABLE();
-            return {};
-        }
-    }
-
-    std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
-        switch (type) {
-        case Type::Float:
-            return {t_float, t_float2, t_float3, t_float4};
-        case Type::Int:
-            return {t_int, t_int2, t_int3, t_int4};
-        case Type::Uint:
-            return {t_uint, t_uint2, t_uint3, t_uint4};
-        default:
-            UNIMPLEMENTED();
-            return {};
-        }
-    }
-
-    std::tuple<Id, Id> CreateFlowStack() {
-        // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
-        // that shaders will use 20 nested SSYs and PBKs.
-        constexpr u32 FLOW_STACK_SIZE = 20;
-        constexpr auto storage_class = spv::StorageClass::Function;
-
-        const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
-        const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class,
-                                    ConstantNull(flow_stack_type));
-        const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0));
-        AddLocalVariable(stack);
-        AddLocalVariable(top);
-        return std::tie(stack, top);
-    }
-
-    std::pair<Id, Id> GetFlowStack(Operation operation) {
-        const auto stack_class = std::get<MetaStackClass>(operation.GetMeta());
-        switch (stack_class) {
-        case MetaStackClass::Ssy:
-            return {ssy_flow_stack, ssy_flow_stack_top};
-        case MetaStackClass::Pbk:
-            return {pbk_flow_stack, pbk_flow_stack_top};
-        }
-        UNREACHABLE();
-        return {};
-    }
-
-    Id GetGlobalMemoryPointer(const GmemNode& gmem) {
-        const Id real = AsUint(Visit(gmem.GetRealAddress()));
-        const Id base = AsUint(Visit(gmem.GetBaseAddress()));
-        const Id diff = OpISub(t_uint, real, base);
-        const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
-        const Id buffer = global_buffers.at(gmem.GetDescriptor());
-        return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
-    }
-
-    Id GetSharedMemoryPointer(const SmemNode& smem) {
-        ASSERT(stage == ShaderType::Compute);
-        Id address = AsUint(Visit(smem.GetAddress()));
-        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-        return OpAccessChain(t_smem_uint, shared_memory, address);
-    }
-
-    static constexpr std::array operation_decompilers = {
-        &SPIRVDecompiler::Assign,
-
-        &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
-                                  Type::Float>,
-
-        &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
-        &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
-        &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
-        &SPIRVDecompiler::FCastHalf<0>,
-        &SPIRVDecompiler::FCastHalf<1>,
-        &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
-        &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
-        &SPIRVDecompiler::FSwizzleAdd,
-
-        &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
-        &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
-        &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
-
-        &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
-        &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
-        &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
-        &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
-        &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
-        &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>,
-
-        &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
-        &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
-        &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
-        &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
-        &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
-        &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
-        &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
-        &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>,
-
-        &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
-        &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
-        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
-        &SPIRVDecompiler::HNegate,
-        &SPIRVDecompiler::HClamp,
-        &SPIRVDecompiler::HCastFloat,
-        &SPIRVDecompiler::HUnpack,
-        &SPIRVDecompiler::HMergeF32,
-        &SPIRVDecompiler::HMergeHN<0>,
-        &SPIRVDecompiler::HMergeHN<1>,
-        &SPIRVDecompiler::HPack2,
-
-        &SPIRVDecompiler::LogicalAssign,
-        &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
-        &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
-        &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
-        &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
-        &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2,
-                                 Type::Uint>,
-        &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>,
-
-        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::LogicalFOrdered,
-        &SPIRVDecompiler::LogicalFUnordered,
-        &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
-        &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
-
-        &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
-        &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
-
-        &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
-        &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
-
-        &SPIRVDecompiler::LogicalAddCarry,
-
-        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
-        // TODO(Rodrigo): Should these use the OpFUnord* variants?
-        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
-        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
-
-        &SPIRVDecompiler::Texture,
-        &SPIRVDecompiler::TextureLod,
-        &SPIRVDecompiler::TextureGather,
-        &SPIRVDecompiler::TextureQueryDimensions,
-        &SPIRVDecompiler::TextureQueryLod,
-        &SPIRVDecompiler::TexelFetch,
-        &SPIRVDecompiler::TextureGradient,
-
-        &SPIRVDecompiler::ImageLoad,
-        &SPIRVDecompiler::ImageStore,
-        &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
-        &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
-        &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
-        &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
-        &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
-
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
-
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
-
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
-
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
-        &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
-
-        &SPIRVDecompiler::Branch,
-        &SPIRVDecompiler::BranchIndirect,
-        &SPIRVDecompiler::PushFlowStack,
-        &SPIRVDecompiler::PopFlowStack,
-        &SPIRVDecompiler::Exit,
-        &SPIRVDecompiler::Discard,
-
-        &SPIRVDecompiler::EmitVertex,
-        &SPIRVDecompiler::EndPrimitive,
-
-        &SPIRVDecompiler::InvocationId,
-        &SPIRVDecompiler::YNegate,
-        &SPIRVDecompiler::LocalInvocationId<0>,
-        &SPIRVDecompiler::LocalInvocationId<1>,
-        &SPIRVDecompiler::LocalInvocationId<2>,
-        &SPIRVDecompiler::WorkGroupId<0>,
-        &SPIRVDecompiler::WorkGroupId<1>,
-        &SPIRVDecompiler::WorkGroupId<2>,
-
-        &SPIRVDecompiler::BallotThread,
-        &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>,
-        &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>,
-        &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
-
-        &SPIRVDecompiler::ThreadId,
-        &SPIRVDecompiler::ThreadMask<0>, // Eq
-        &SPIRVDecompiler::ThreadMask<1>, // Ge
-        &SPIRVDecompiler::ThreadMask<2>, // Gt
-        &SPIRVDecompiler::ThreadMask<3>, // Le
-        &SPIRVDecompiler::ThreadMask<4>, // Lt
-        &SPIRVDecompiler::ShuffleIndexed,
-
-        &SPIRVDecompiler::Barrier,
-        &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
-        &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
-    };
-    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
-
-    const Device& device;
-    const ShaderIR& ir;
-    const ShaderType stage;
-    const Tegra::Shader::Header header;
-    const Registry& registry;
-    const Specialization& specialization;
-    std::unordered_map<u8, VaryingTFB> transform_feedback;
-
-    const Id t_void = Name(TypeVoid(), "void");
-
-    const Id t_bool = Name(TypeBool(), "bool");
-    const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
-
-    const Id t_int = Name(TypeInt(32, true), "int");
-    const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
-    const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
-    const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
-
-    const Id t_uint = Name(TypeInt(32, false), "uint");
-    const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
-    const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
-    const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
-
-    const Id t_float = Name(TypeFloat(32), "float");
-    const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
-    const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
-    const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
-
-    const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
-    const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
-
-    const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
-
-    const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
-    const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int");
-    const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4");
-    const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
-    const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3");
-    const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4");
-    const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
-    const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2");
-    const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3");
-    const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
-
-    const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int");
-
-    const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
-    const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
-
-    const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
-    const Id t_cbuf_std140 = Decorate(
-        Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"),
-        spv::Decoration::ArrayStride, 16U);
-    const Id t_cbuf_scalar = Decorate(
-        Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"),
-        spv::Decoration::ArrayStride, 4U);
-    const Id t_cbuf_std140_struct = MemberDecorate(
-        Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
-    const Id t_cbuf_scalar_struct = MemberDecorate(
-        Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
-    const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
-    const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
-
-    Id t_smem_uint{};
-
-    const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
-    const Id t_gmem_array =
-        Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
-    const Id t_gmem_struct = MemberDecorate(
-        Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
-    const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
-
-    const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
-
-    const Id v_float_zero = Constant(t_float, 0.0f);
-    const Id v_float_one = Constant(t_float, 1.0f);
-    const Id v_uint_zero = Constant(t_uint, 0);
-
-    // Nvidia uses these defaults for varyings (e.g. position and generic attributes)
-    const Id v_varying_default =
-        ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one);
-
-    const Id v_true = ConstantTrue(t_bool);
-    const Id v_false = ConstantFalse(t_bool);
-
-    Id t_scalar_half{};
-    Id t_half{};
-
-    Id out_vertex{};
-    Id in_vertex{};
-    std::map<u32, Id> registers;
-    std::map<u32, Id> custom_variables;
-    std::map<Tegra::Shader::Pred, Id> predicates;
-    std::map<u32, Id> flow_variables;
-    Id local_memory{};
-    Id shared_memory{};
-    std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
-    std::map<Attribute::Index, Id> input_attributes;
-    std::unordered_map<u8, GenericVaryingDescription> output_attributes;
-    std::map<u32, Id> constant_buffers;
-    std::map<GlobalMemoryBase, Id> global_buffers;
-    std::map<u32, TexelBuffer> uniform_texels;
-    std::map<u32, SampledImage> sampled_images;
-    std::map<u32, StorageImage> images;
-
-    std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
-    Id instance_index{};
-    Id vertex_index{};
-    Id base_instance{};
-    Id base_vertex{};
-    Id frag_depth{};
-    Id frag_coord{};
-    Id front_facing{};
-    Id point_coord{};
-    Id tess_level_outer{};
-    Id tess_level_inner{};
-    Id tess_coord{};
-    Id invocation_id{};
-    Id workgroup_id{};
-    Id local_invocation_id{};
-    Id thread_id{};
-    std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
-
-    VertexIndices in_indices;
-    VertexIndices out_indices;
-
-    std::vector<Id> interfaces;
-
-    Id jmp_to{};
-    Id ssy_flow_stack_top{};
-    Id pbk_flow_stack_top{};
-    Id ssy_flow_stack{};
-    Id pbk_flow_stack{};
-    Id continue_label{};
-    std::map<u32, Id> labels;
-
-    bool conditional_branch_set{};
-    bool inside_branch{};
-};
-
-class ExprDecompiler {
-public:
-    explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
-
-    Id operator()(const ExprAnd& expr) {
-        const Id type_def = decomp.GetTypeDefinition(Type::Bool);
-        const Id op1 = Visit(expr.operand1);
-        const Id op2 = Visit(expr.operand2);
-        return decomp.OpLogicalAnd(type_def, op1, op2);
-    }
-
-    Id operator()(const ExprOr& expr) {
-        const Id type_def = decomp.GetTypeDefinition(Type::Bool);
-        const Id op1 = Visit(expr.operand1);
-        const Id op2 = Visit(expr.operand2);
-        return decomp.OpLogicalOr(type_def, op1, op2);
-    }
-
-    Id operator()(const ExprNot& expr) {
-        const Id type_def = decomp.GetTypeDefinition(Type::Bool);
-        const Id op1 = Visit(expr.operand1);
-        return decomp.OpLogicalNot(type_def, op1);
-    }
-
-    Id operator()(const ExprPredicate& expr) {
-        const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
-        return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred));
-    }
-
-    Id operator()(const ExprCondCode& expr) {
-        return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
-    }
-
-    Id operator()(const ExprVar& expr) {
-        return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index));
-    }
-
-    Id operator()(const ExprBoolean& expr) {
-        return expr.value ? decomp.v_true : decomp.v_false;
-    }
-
-    Id operator()(const ExprGprEqual& expr) {
-        const Id target = decomp.Constant(decomp.t_uint, expr.value);
-        Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
-        gpr = decomp.OpBitcast(decomp.t_uint, gpr);
-        return decomp.OpIEqual(decomp.t_bool, gpr, target);
-    }
-
-    Id Visit(const Expr& node) {
-        return std::visit(*this, *node);
-    }
-
-private:
-    SPIRVDecompiler& decomp;
-};
-
-class ASTDecompiler {
-public:
-    explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
-
-    void operator()(const ASTProgram& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()(const ASTIfThen& ast) {
-        ExprDecompiler expr_parser{decomp};
-        const Id condition = expr_parser.Visit(ast.condition);
-        const Id then_label = decomp.OpLabel();
-        const Id endif_label = decomp.OpLabel();
-        decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
-        decomp.OpBranchConditional(condition, then_label, endif_label);
-        decomp.AddLabel(then_label);
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        decomp.OpBranch(endif_label);
-        decomp.AddLabel(endif_label);
-    }
-
-    void operator()([[maybe_unused]] const ASTIfElse& ast) {
-        UNREACHABLE();
-    }
-
-    void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
-        UNREACHABLE();
-    }
-
-    void operator()(const ASTBlockDecoded& ast) {
-        decomp.VisitBasicBlock(ast.nodes);
-    }
-
-    void operator()(const ASTVarSet& ast) {
-        ExprDecompiler expr_parser{decomp};
-        const Id condition = expr_parser.Visit(ast.condition);
-        decomp.OpStore(decomp.flow_variables.at(ast.index), condition);
-    }
-
-    void operator()([[maybe_unused]] const ASTLabel& ast) {
-        // Do nothing
-    }
-
-    void operator()([[maybe_unused]] const ASTGoto& ast) {
-        UNREACHABLE();
-    }
-
-    void operator()(const ASTDoWhile& ast) {
-        const Id loop_label = decomp.OpLabel();
-        const Id endloop_label = decomp.OpLabel();
-        const Id loop_start_block = decomp.OpLabel();
-        const Id loop_continue_block = decomp.OpLabel();
-        current_loop_exit = endloop_label;
-        decomp.OpBranch(loop_label);
-        decomp.AddLabel(loop_label);
-        decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
-        decomp.OpBranch(loop_start_block);
-        decomp.AddLabel(loop_start_block);
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        decomp.OpBranch(loop_continue_block);
-        decomp.AddLabel(loop_continue_block);
-        ExprDecompiler expr_parser{decomp};
-        const Id condition = expr_parser.Visit(ast.condition);
-        decomp.OpBranchConditional(condition, loop_label, endloop_label);
-        decomp.AddLabel(endloop_label);
-    }
-
-    void operator()(const ASTReturn& ast) {
-        if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
-            ExprDecompiler expr_parser{decomp};
-            const Id condition = expr_parser.Visit(ast.condition);
-            const Id then_label = decomp.OpLabel();
-            const Id endif_label = decomp.OpLabel();
-            decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
-            decomp.OpBranchConditional(condition, then_label, endif_label);
-            decomp.AddLabel(then_label);
-            if (ast.kills) {
-                decomp.OpKill();
-            } else {
-                decomp.PreExit();
-                decomp.OpReturn();
-            }
-            decomp.AddLabel(endif_label);
-        } else {
-            const Id next_block = decomp.OpLabel();
-            decomp.OpBranch(next_block);
-            decomp.AddLabel(next_block);
-            if (ast.kills) {
-                decomp.OpKill();
-            } else {
-                decomp.PreExit();
-                decomp.OpReturn();
-            }
-            decomp.AddLabel(decomp.OpLabel());
-        }
-    }
-
-    void operator()(const ASTBreak& ast) {
-        if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
-            ExprDecompiler expr_parser{decomp};
-            const Id condition = expr_parser.Visit(ast.condition);
-            const Id then_label = decomp.OpLabel();
-            const Id endif_label = decomp.OpLabel();
-            decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
-            decomp.OpBranchConditional(condition, then_label, endif_label);
-            decomp.AddLabel(then_label);
-            decomp.OpBranch(current_loop_exit);
-            decomp.AddLabel(endif_label);
-        } else {
-            const Id next_block = decomp.OpLabel();
-            decomp.OpBranch(next_block);
-            decomp.AddLabel(next_block);
-            decomp.OpBranch(current_loop_exit);
-            decomp.AddLabel(decomp.OpLabel());
-        }
-    }
-
-    void Visit(const ASTNode& node) {
-        std::visit(*this, *node->GetInnerData());
-    }
-
-private:
-    SPIRVDecompiler& decomp;
-    Id current_loop_exit{};
-};
-
-void SPIRVDecompiler::DecompileAST() {
-    const u32 num_flow_variables = ir.GetASTNumVariables();
-    for (u32 i = 0; i < num_flow_variables; i++) {
-        const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
-        Name(id, fmt::format("flow_var_{}", i));
-        flow_variables.emplace(i, AddGlobalVariable(id));
-    }
-
-    DefinePrologue();
-
-    const ASTNode program = ir.GetASTProgram();
-    ASTDecompiler decompiler{*this};
-    decompiler.Visit(program);
-
-    const Id next_block = OpLabel();
-    OpBranch(next_block);
-    AddLabel(next_block);
-}
-
-} // Anonymous namespace
-
-ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
-    ShaderEntries entries;
-    for (const auto& cbuf : ir.GetConstantBuffers()) {
-        entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
-    }
-    for (const auto& [base, usage] : ir.GetGlobalMemory()) {
-        entries.global_buffers.emplace_back(GlobalBufferEntry{
-            .cbuf_index = base.cbuf_index,
-            .cbuf_offset = base.cbuf_offset,
-            .is_written = usage.is_written,
-        });
-    }
-    for (const auto& sampler : ir.GetSamplers()) {
-        if (sampler.is_buffer) {
-            entries.uniform_texels.emplace_back(sampler);
-        } else {
-            entries.samplers.emplace_back(sampler);
-        }
-    }
-    for (const auto& image : ir.GetImages()) {
-        if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
-            entries.storage_texels.emplace_back(image);
-        } else {
-            entries.images.emplace_back(image);
-        }
-    }
-    for (const auto& attribute : ir.GetInputAttributes()) {
-        if (IsGenericAttribute(attribute)) {
-            entries.attributes.insert(GetGenericAttributeLocation(attribute));
-        }
-    }
-    for (const auto& buffer : entries.const_buffers) {
-        entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
-    }
-    entries.clip_distances = ir.GetClipDistances();
-    entries.shader_length = ir.GetLength();
-    entries.uses_warps = ir.UsesWarps();
-    return entries;
-}
-
-std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
-                           ShaderType stage, const VideoCommon::Shader::Registry& registry,
-                           const Specialization& specialization) {
-    return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
deleted file mode 100644
index 5d94132a5..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <set>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace Vulkan {
-
-class Device;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
-using SamplerEntry = VideoCommon::Shader::SamplerEntry;
-using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
-using ImageEntry = VideoCommon::Shader::ImageEntry;
-
-constexpr u32 DESCRIPTOR_SET = 0;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
-    explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
-        : ConstBuffer{entry_}, index{index_} {}
-
-    constexpr u32 GetIndex() const {
-        return index;
-    }
-
-private:
-    u32 index{};
-};
-
-struct GlobalBufferEntry {
-    u32 cbuf_index{};
-    u32 cbuf_offset{};
-    bool is_written{};
-};
-
-struct ShaderEntries {
-    u32 NumBindings() const {
-        return static_cast<u32>(const_buffers.size() + global_buffers.size() +
-                                uniform_texels.size() + samplers.size() + storage_texels.size() +
-                                images.size());
-    }
-
-    std::vector<ConstBufferEntry> const_buffers;
-    std::vector<GlobalBufferEntry> global_buffers;
-    std::vector<UniformTexelEntry> uniform_texels;
-    std::vector<SamplerEntry> samplers;
-    std::vector<StorageTexelEntry> storage_texels;
-    std::vector<ImageEntry> images;
-    std::set<u32> attributes;
-    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
-    std::size_t shader_length{};
-    u32 enabled_uniform_buffers{};
-    bool uses_warps{};
-};
-
-struct Specialization final {
-    u32 base_binding{};
-
-    // Compute specific
-    std::array<u32, 3> workgroup_size{};
-    u32 shared_memory_size{};
-
-    // Graphics specific
-    std::optional<float> point_size;
-    std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
-    std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
-    bool ndc_minus_one_to_one{};
-    bool early_fragment_tests{};
-    float alpha_test_ref{};
-    Maxwell::ComparisonOp alpha_test_func{};
-};
-// Old gcc versions don't consider this trivially copyable.
-// static_assert(std::is_trivially_copyable_v<Specialization>);
-
-struct SPIRVShader {
-    std::vector<u32> code;
-    ShaderEntries entries;
-};
-
-ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
-
-std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
-                           Tegra::Engines::ShaderType stage,
-                           const VideoCommon::Shader::Registry& registry,
-                           const Specialization& specialization);
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 0412b5234..555b12ed7 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
         .flags = 0,
         .size = STREAM_BUFFER_SIZE,
         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
-                 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+                 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 956f86845..e3b7dd61c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags;
 
 Flags MakeInvalidationFlags() {
     static constexpr int INVALIDATION_FLAGS[]{
-        Viewports,         Scissors,  DepthBias,         BlendConstants,    DepthBounds,
-        StencilProperties, CullMode,  DepthBoundsEnable, DepthTestEnable,   DepthWriteEnable,
-        DepthCompareOp,    FrontFace, StencilOp,         StencilTestEnable, VertexBuffers,
+        Viewports,         Scissors,       DepthBias, BlendConstants,    DepthBounds,
+        StencilProperties, LineWidth,      CullMode,  DepthBoundsEnable, DepthTestEnable,
+        DepthWriteEnable,  DepthCompareOp, FrontFace, StencilOp,         StencilTestEnable,
+        VertexBuffers,     VertexInput,
     };
     Flags flags{};
     for (const int flag : INVALIDATION_FLAGS) {
@@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() {
     for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
         flags[index] = true;
     }
+    for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
+        flags[index] = true;
+    }
+    for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
+        flags[index] = true;
+    }
     return flags;
 }
 
@@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) {
     table[OFF(stencil_back_func_mask)] = StencilProperties;
 }
 
+void SetupDirtyLineWidth(Tables& tables) {
+    tables[0][OFF(line_width_smooth)] = LineWidth;
+    tables[0][OFF(line_width_aliased)] = LineWidth;
+}
+
 void SetupDirtyCullMode(Tables& tables) {
     auto& table = tables[0];
     table[OFF(cull_face)] = CullMode;
@@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) {
     FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
 }
 
-void SetupDirtyInstanceDivisors(Tables& tables) {
-    static constexpr size_t divisor_offset = 3;
-    for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
-        tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
-        tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
-            InstanceDivisors;
+void SetupDirtyViewportSwizzles(Tables& tables) {
+    static constexpr size_t swizzle_offset = 6;
+    for (size_t index = 0; index < Regs::NumViewports; ++index) {
+        tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
+            ViewportSwizzles;
     }
 }
 
 void SetupDirtyVertexAttributes(Tables& tables) {
-    FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
+    for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
+        const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
+        FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
+    }
+    FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
 }
 
-void SetupDirtyViewportSwizzles(Tables& tables) {
-    static constexpr size_t swizzle_offset = 6;
-    for (size_t index = 0; index < Regs::NumViewports; ++index) {
-        tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
-            ViewportSwizzles;
+void SetupDirtyVertexBindings(Tables& tables) {
+    // Do NOT include stride here, it's implicit in VertexBuffer
+    static constexpr size_t divisor_offset = 3;
+    for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
+        const u8 flag = static_cast<u8>(VertexBinding0 + i);
+        tables[0][OFF(instanced_arrays) + i] = VertexInput;
+        tables[1][OFF(instanced_arrays) + i] = flag;
+        tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput;
+        tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag;
     }
 }
 } // Anonymous namespace
 
 StateTracker::StateTracker(Tegra::GPU& gpu)
     : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
-    auto& tables = gpu.Maxwell3D().dirty.tables;
+    auto& tables{gpu.Maxwell3D().dirty.tables};
     SetupDirtyFlags(tables);
     SetupDirtyViewports(tables);
     SetupDirtyScissors(tables);
@@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
     SetupDirtyBlendConstants(tables);
     SetupDirtyDepthBounds(tables);
     SetupDirtyStencilProperties(tables);
+    SetupDirtyLineWidth(tables);
     SetupDirtyCullMode(tables);
     SetupDirtyDepthBoundsEnable(tables);
     SetupDirtyDepthTestEnable(tables);
@@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
     SetupDirtyStencilOp(tables);
     SetupDirtyStencilTestEnable(tables);
     SetupDirtyBlending(tables);
-    SetupDirtyInstanceDivisors(tables);
-    SetupDirtyVertexAttributes(tables);
     SetupDirtyViewportSwizzles(tables);
+    SetupDirtyVertexAttributes(tables);
+    SetupDirtyVertexBindings(tables);
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 84e918a71..5f78f6950 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -19,12 +19,19 @@ namespace Dirty {
 enum : u8 {
     First = VideoCommon::Dirty::LastCommonEntry,
 
+    VertexInput,
+    VertexAttribute0,
+    VertexAttribute31 = VertexAttribute0 + 31,
+    VertexBinding0,
+    VertexBinding31 = VertexBinding0 + 31,
+
     Viewports,
     Scissors,
     DepthBias,
     BlendConstants,
     DepthBounds,
     StencilProperties,
+    LineWidth,
 
     CullMode,
     DepthBoundsEnable,
@@ -36,11 +43,9 @@ enum : u8 {
     StencilTestEnable,
 
     Blending,
-    InstanceDivisors,
-    VertexAttributes,
     ViewportSwizzles,
 
-    Last
+    Last,
 };
 static_assert(Last <= std::numeric_limits<u8>::max());
 
@@ -89,6 +94,10 @@ public:
         return Exchange(Dirty::StencilProperties, false);
     }
 
+    bool TouchLineWidth() const {
+        return Exchange(Dirty::LineWidth, false);
+    }
+
     bool TouchCullMode() {
         return Exchange(Dirty::CullMode, false);
     }
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index dfd5c65ba..d990eefba 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul
 VKSwapchain::~VKSwapchain() = default;
 
 void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
+    is_outdated = false;
+    is_suboptimal = false;
+
     const auto physical_device = device.GetPhysical();
     const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
     if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
@@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
     resource_ticks.resize(image_count);
 }
 
-bool VKSwapchain::AcquireNextImage() {
-    const VkResult result =
-        device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
-                                                *present_semaphores[frame_index], {}, &image_index);
-
+void VKSwapchain::AcquireNextImage() {
+    const VkResult result = device.GetLogical().AcquireNextImageKHR(
+        *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
+        VK_NULL_HANDLE, &image_index);
+    switch (result) {
+    case VK_SUCCESS:
+        break;
+    case VK_SUBOPTIMAL_KHR:
+        is_suboptimal = true;
+        break;
+    case VK_ERROR_OUT_OF_DATE_KHR:
+        is_outdated = true;
+        break;
+    default:
+        LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
+        break;
+    }
     scheduler.Wait(resource_ticks[image_index]);
-    return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR;
+    resource_ticks[image_index] = scheduler.CurrentTick();
 }
 
-bool VKSwapchain::Present(VkSemaphore render_semaphore) {
+void VKSwapchain::Present(VkSemaphore render_semaphore) {
     const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
     const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
     const auto present_queue{device.GetPresentQueue()};
-    bool recreated = false;
-
     const VkPresentInfoKHR present_info{
         .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
         .pNext = nullptr,
@@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
         .pImageIndices = &image_index,
         .pResults = nullptr,
     };
-
     switch (const VkResult result = present_queue.Present(present_info)) {
     case VK_SUCCESS:
         break;
@@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
         LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
         break;
     case VK_ERROR_OUT_OF_DATE_KHR:
-        if (current_width > 0 && current_height > 0) {
-            Create(current_width, current_height, current_srgb);
-            recreated = true;
-        }
+        is_outdated = true;
         break;
     default:
         LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
         break;
     }
-
-    resource_ticks[image_index] = scheduler.CurrentTick();
-    frame_index = (frame_index + 1) % static_cast<u32>(image_count);
-    return recreated;
-}
-
-bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
-    // TODO(Rodrigo): Handle framebuffer pixel format changes
-    return framebuffer.width != current_width || framebuffer.height != current_height;
+    ++frame_index;
+    if (frame_index >= image_count) {
+        frame_index = 0;
+    }
 }
 
 void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
@@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
     if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
         requested_image_count = capabilities.maxImageCount;
     }
-
     VkSwapchainCreateInfoKHR swapchain_ci{
         .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
         .pNext = nullptr,
@@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
         .clipped = VK_FALSE,
         .oldSwapchain = nullptr,
     };
-
     const u32 graphics_family{device.GetGraphicsFamily()};
     const u32 present_family{device.GetPresentFamily()};
     const std::array<u32, 2> queue_indices{graphics_family, present_family};
@@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
         swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
         swapchain_ci.pQueueFamilyIndices = queue_indices.data();
     }
-
     // Request the size again to reduce the possibility of a TOCTOU race condition.
     const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
     swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
     swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
 
     extent = swapchain_ci.imageExtent;
-    current_width = extent.width;
-    current_height = extent.height;
     current_srgb = srgb;
 
     images = swapchain.GetImages();
@@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
 
 void VKSwapchain::CreateSemaphores() {
     present_semaphores.resize(image_count);
-    std::generate(present_semaphores.begin(), present_semaphores.end(),
-                  [this] { return device.GetLogical().CreateSemaphore(); });
+    std::ranges::generate(present_semaphores,
+                          [this] { return device.GetLogical().CreateSemaphore(); });
 }
 
 void VKSwapchain::CreateImageViews() {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index adc8d27cf..35c2cdc14 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -28,14 +28,25 @@ public:
     void Create(u32 width, u32 height, bool srgb);
 
     /// Acquires the next image in the swapchain, waits as needed.
-    bool AcquireNextImage();
+    void AcquireNextImage();
 
-    /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
-    /// recreated. Takes responsability for the ownership of fence.
-    bool Present(VkSemaphore render_semaphore);
+    /// Presents the rendered image to the swapchain.
+    void Present(VkSemaphore render_semaphore);
 
-    /// Returns true when the framebuffer layout has changed.
-    bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
+    /// Returns true when the color space has changed.
+    bool HasColorSpaceChanged(bool is_srgb) const {
+        return current_srgb != is_srgb;
+    }
+
+    /// Returns true when the swapchain is outdated.
+    bool IsOutDated() const {
+        return is_outdated;
+    }
+
+    /// Returns true when the swapchain is suboptimal.
+    bool IsSubOptimal() const {
+        return is_suboptimal;
+    }
 
     VkExtent2D GetSize() const {
         return extent;
@@ -61,10 +72,6 @@ public:
         return image_format;
     }
 
-    bool GetSrgbState() const {
-        return current_srgb;
-    }
-
 private:
     void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
                          bool srgb);
@@ -92,9 +99,9 @@ private:
     VkFormat image_format{};
     VkExtent2D extent{};
 
-    u32 current_width{};
-    u32 current_height{};
     bool current_srgb{};
+    bool is_outdated{};
+    bool is_suboptimal{};
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 88ccf96f5..8e029bcb3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,6 +15,7 @@
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
 using VideoCore::Surface::IsPixelFormatASTC;
 
 namespace {
-
-constexpr std::array ATTACHMENT_REFERENCES{
-    VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
-};
-
 constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     if (color == std::array<float, 4>{0, 0, 0, 0}) {
         return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
@@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
 }
 
-[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
-    if (info.type != ImageType::Buffer) {
-        return vk::Buffer{};
-    }
-    const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
-    return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
-        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .size = info.size.width * bytes_per_block,
-        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
-                 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
-                 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
-        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-        .queueFamilyIndexCount = 0,
-        .pQueueFamilyIndices = nullptr,
-    });
-}
-
 [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
     switch (VideoCore::Surface::GetFormatType(format)) {
     case VideoCore::Surface::SurfaceType::ColorTexture:
@@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     }
 }
 
-[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
-                                                            const ImageView* image_view) {
-    using MaxwellToVK::SurfaceFormat;
-    const PixelFormat pixel_format = image_view->format;
-    return VkAttachmentDescription{
-        .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
-        .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
-        .samples = image_view->Samples(),
-        .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-        .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-        .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-        .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
-        .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-        .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-    };
-}
-
 [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
     switch (swizzle) {
     case SwizzleSource::Zero:
@@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     return VK_COMPONENT_SWIZZLE_ZERO;
 }
 
+[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
+    switch (type) {
+    case Shader::TextureType::Color1D:
+        return VK_IMAGE_VIEW_TYPE_1D;
+    case Shader::TextureType::Color2D:
+        return VK_IMAGE_VIEW_TYPE_2D;
+    case Shader::TextureType::ColorCube:
+        return VK_IMAGE_VIEW_TYPE_CUBE;
+    case Shader::TextureType::Color3D:
+        return VK_IMAGE_VIEW_TYPE_3D;
+    case Shader::TextureType::ColorArray1D:
+        return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
+    case Shader::TextureType::ColorArray2D:
+        return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+    case Shader::TextureType::ColorArrayCube:
+        return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
+    case Shader::TextureType::Buffer:
+        UNREACHABLE_MSG("Texture buffers can't be image views");
+        return VK_IMAGE_VIEW_TYPE_1D;
+    }
+    UNREACHABLE_MSG("Invalid image view type={}", type);
+    return VK_IMAGE_VIEW_TYPE_2D;
+}
+
 [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
     switch (type) {
     case VideoCommon::ImageViewType::e1D:
@@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     case VideoCommon::ImageViewType::CubeArray:
         return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
     case VideoCommon::ImageViewType::Rect:
-        LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported");
+        UNIMPLEMENTED_MSG("Rect image view");
         return VK_IMAGE_VIEW_TYPE_2D;
     case VideoCommon::ImageViewType::Buffer:
         UNREACHABLE_MSG("Texture buffers can't be image views");
@@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     };
 }
 
-[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
+[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
     std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
     std::vector<VkBufferCopy> result(copies.size());
     std::ranges::transform(
@@ -587,6 +563,28 @@ struct RangedBarrierRange {
     }
 };
 
+[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
+    switch (format) {
+    case Shader::ImageFormat::Typeless:
+        break;
+    case Shader::ImageFormat::R8_SINT:
+        return VK_FORMAT_R8_SINT;
+    case Shader::ImageFormat::R8_UINT:
+        return VK_FORMAT_R8_UINT;
+    case Shader::ImageFormat::R16_UINT:
+        return VK_FORMAT_R16_UINT;
+    case Shader::ImageFormat::R16_SINT:
+        return VK_FORMAT_R16_SINT;
+    case Shader::ImageFormat::R32_UINT:
+        return VK_FORMAT_R32_UINT;
+    case Shader::ImageFormat::R32G32_UINT:
+        return VK_FORMAT_R32G32_UINT;
+    case Shader::ImageFormat::R32G32B32A32_UINT:
+        return VK_FORMAT_R32G32B32A32_UINT;
+    }
+    UNREACHABLE_MSG("Invalid image format={}", format);
+    return VK_FORMAT_R32_UINT;
+}
 } // Anonymous namespace
 
 void TextureCacheRuntime::Finish() {
@@ -625,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
             return;
         }
     }
-    ASSERT(src.ImageFormat() == dst.ImageFormat());
+    ASSERT(src.format == dst.format);
     ASSERT(!(is_dst_msaa && !is_src_msaa));
     ASSERT(operation == Fermi2D::Operation::SrcCopy);
 
@@ -842,13 +840,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
 Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
              VAddr cpu_addr_)
     : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
-      image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
+      image(MakeImage(runtime.device, info)),
+      commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
       aspect_mask(ImageAspectMask(info.format)) {
-    if (image) {
-        commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
-    } else {
-        commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
-    }
     if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
         if (Settings::values.accelerate_astc.GetValue()) {
             flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
@@ -857,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
         }
     }
     if (runtime.device.HasDebuggingToolAttached()) {
-        if (image) {
-            image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
-        } else {
-            buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
-        }
+        image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
     }
     static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
@@ -913,19 +903,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
     });
 }
 
-void Image::UploadMemory(const StagingBufferRef& map,
-                         std::span<const VideoCommon::BufferCopy> copies) {
-    // TODO: Move this to another API
-    scheduler->RequestOutsideRenderPassOperationContext();
-    std::vector vk_copies = TransformBufferCopies(copies, map.offset);
-    const VkBuffer src_buffer = map.buffer;
-    const VkBuffer dst_buffer = *buffer;
-    scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
-        // TODO: Barriers
-        cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
-    });
-}
-
 void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
     std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
     scheduler->RequestOutsideRenderPassOperationContext();
@@ -984,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
 ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
                      ImageId image_id_, Image& image)
     : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
-      image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount(
-                                                                         image.info.num_samples)} {
+      image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} {
+    using Shader::TextureType;
+
     const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
     std::array<SwizzleSource, 4> swizzle{
         SwizzleSource::R,
@@ -1023,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
         },
         .subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
     };
-    const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) {
+    const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
         VkImageViewCreateInfo ci{create_info};
-        ci.viewType = ImageViewType(view_type);
+        ci.viewType = ImageViewType(tex_type);
         if (num_layers) {
             ci.subresourceRange.layerCount = *num_layers;
         }
         vk::ImageView handle = device->GetLogical().CreateImageView(ci);
         if (device->HasDebuggingToolAttached()) {
-            handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str());
+            handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
         }
-        image_views[static_cast<size_t>(view_type)] = std::move(handle);
+        image_views[static_cast<size_t>(tex_type)] = std::move(handle);
     };
     switch (info.type) {
     case VideoCommon::ImageViewType::e1D:
     case VideoCommon::ImageViewType::e1DArray:
-        create(VideoCommon::ImageViewType::e1D, 1);
-        create(VideoCommon::ImageViewType::e1DArray, std::nullopt);
-        render_target = Handle(VideoCommon::ImageViewType::e1DArray);
+        create(TextureType::Color1D, 1);
+        create(TextureType::ColorArray1D, std::nullopt);
+        render_target = Handle(TextureType::ColorArray1D);
         break;
     case VideoCommon::ImageViewType::e2D:
     case VideoCommon::ImageViewType::e2DArray:
-        create(VideoCommon::ImageViewType::e2D, 1);
-        create(VideoCommon::ImageViewType::e2DArray, std::nullopt);
-        render_target = Handle(VideoCommon::ImageViewType::e2DArray);
+        create(TextureType::Color2D, 1);
+        create(TextureType::ColorArray2D, std::nullopt);
+        render_target = Handle(Shader::TextureType::ColorArray2D);
         break;
     case VideoCommon::ImageViewType::e3D:
-        create(VideoCommon::ImageViewType::e3D, std::nullopt);
-        render_target = Handle(VideoCommon::ImageViewType::e3D);
+        create(TextureType::Color3D, std::nullopt);
+        render_target = Handle(Shader::TextureType::Color3D);
         break;
     case VideoCommon::ImageViewType::Cube:
     case VideoCommon::ImageViewType::CubeArray:
-        create(VideoCommon::ImageViewType::Cube, 6);
-        create(VideoCommon::ImageViewType::CubeArray, std::nullopt);
+        create(TextureType::ColorCube, 6);
+        create(TextureType::ColorArrayCube, std::nullopt);
         break;
     case VideoCommon::ImageViewType::Rect:
         UNIMPLEMENTED();
         break;
     case VideoCommon::ImageViewType::Buffer:
-        buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{
-            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
-            .pNext = nullptr,
-            .flags = 0,
-            .buffer = image.Buffer(),
-            .format = format_info.format,
-            .offset = 0, // TODO: Redesign buffer cache to support this
-            .range = image.guest_size_bytes,
-        });
+        UNREACHABLE();
         break;
     }
 }
 
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+                     const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
+    : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+      buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
+
 ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
     : VideoCommon::ImageViewBase{params} {}
 
@@ -1081,7 +1056,8 @@ VkImageView ImageView::DepthView() {
     if (depth_view) {
         return *depth_view;
     }
-    depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT);
+    const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
+    depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT);
     return *depth_view;
 }
 
@@ -1089,18 +1065,38 @@ VkImageView ImageView::StencilView() {
     if (stencil_view) {
         return *stencil_view;
     }
-    stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT);
+    const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
+    stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT);
     return *stencil_view;
 }
 
-vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) {
+VkImageView ImageView::StorageView(Shader::TextureType texture_type,
+                                   Shader::ImageFormat image_format) {
+    if (image_format == Shader::ImageFormat::Typeless) {
+        return Handle(texture_type);
+    }
+    const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
+                         image_format == Shader::ImageFormat::R16_SINT};
+    if (!storage_views) {
+        storage_views = std::make_unique<StorageViews>();
+    }
+    auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
+    auto& view{views[static_cast<size_t>(texture_type)]};
+    if (view) {
+        return *view;
+    }
+    view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
+    return *view;
+}
+
+vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
     return device->GetLogical().CreateImageView({
         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
         .image = image_handle,
         .viewType = ImageViewType(type),
-        .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format,
+        .format = vk_format,
         .components{
             .r = VK_COMPONENT_SWIZZLE_IDENTITY,
             .g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -1164,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
 
 Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
                          ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
-    std::vector<VkAttachmentDescription> descriptions;
     std::vector<VkImageView> attachments;
     RenderPassKey renderpass_key{};
     s32 num_layers = 1;
@@ -1175,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
             renderpass_key.color_formats[index] = PixelFormat::Invalid;
             continue;
         }
-        descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
         attachments.push_back(color_buffer->RenderTarget());
         renderpass_key.color_formats[index] = color_buffer->format;
         num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1185,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
         ++num_images;
     }
     const size_t num_colors = attachments.size();
-    const VkAttachmentReference* depth_attachment =
-        depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
     if (depth_buffer) {
-        descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
         attachments.push_back(depth_buffer->RenderTarget());
         renderpass_key.depth_format = depth_buffer->format;
         num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1201,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
     }
     renderpass_key.samples = samples;
 
-    const auto& device = runtime.device.GetLogical();
-    const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
-    if (is_new) {
-        const VkSubpassDescription subpass{
-            .flags = 0,
-            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-            .inputAttachmentCount = 0,
-            .pInputAttachments = nullptr,
-            .colorAttachmentCount = static_cast<u32>(num_colors),
-            .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
-            .pResolveAttachments = nullptr,
-            .pDepthStencilAttachment = depth_attachment,
-            .preserveAttachmentCount = 0,
-            .pPreserveAttachments = nullptr,
-        };
-        cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
-            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-            .pNext = nullptr,
-            .flags = 0,
-            .attachmentCount = static_cast<u32>(descriptions.size()),
-            .pAttachments = descriptions.data(),
-            .subpassCount = 1,
-            .pSubpasses = &subpass,
-            .dependencyCount = 0,
-            .pDependencies = nullptr,
-        });
-    }
-    renderpass = *cache_pair->second;
+    renderpass = runtime.render_pass_cache.Get(renderpass_key);
+
     render_area = VkExtent2D{
         .width = key.size.width,
         .height = key.size.height,
     };
     num_color_buffers = static_cast<u32>(num_colors);
-    framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
+    framebuffer = runtime.device.GetLogical().CreateFramebuffer({
         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 172bcdf98..0b73d55f8 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
 #include <compare>
 #include <span>
 
+#include "shader_recompiler/shader_info.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/texture_cache/texture_cache.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
@@ -26,35 +27,10 @@ class Device;
 class Image;
 class ImageView;
 class Framebuffer;
+class RenderPassCache;
 class StagingBufferPool;
 class VKScheduler;
 
-struct RenderPassKey {
-    constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
-
-    std::array<PixelFormat, NUM_RT> color_formats;
-    PixelFormat depth_format;
-    VkSampleCountFlagBits samples;
-};
-
-} // namespace Vulkan
-
-namespace std {
-template <>
-struct hash<Vulkan::RenderPassKey> {
-    [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
-        size_t value = static_cast<size_t>(key.depth_format) << 48;
-        value ^= static_cast<size_t>(key.samples) << 52;
-        for (size_t i = 0; i < key.color_formats.size(); ++i) {
-            value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
-        }
-        return value;
-    }
-};
-} // namespace std
-
-namespace Vulkan {
-
 struct TextureCacheRuntime {
     const Device& device;
     VKScheduler& scheduler;
@@ -62,13 +38,13 @@ struct TextureCacheRuntime {
     StagingBufferPool& staging_buffer_pool;
     BlitImageHelper& blit_image_helper;
     ASTCDecoderPass& astc_decoder_pass;
-    std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{};
+    RenderPassCache& render_pass_cache;
 
     void Finish();
 
-    [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
+    StagingBufferRef UploadStagingBuffer(size_t size);
 
-    [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
+    StagingBufferRef DownloadStagingBuffer(size_t size);
 
     void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
                    const Region2D& dst_region, const Region2D& src_region,
@@ -79,7 +55,7 @@ struct TextureCacheRuntime {
 
     void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
 
-    [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
+    bool CanAccelerateImageUpload(Image&) const noexcept {
         return false;
     }
 
@@ -117,8 +93,6 @@ public:
     void UploadMemory(const StagingBufferRef& map,
                       std::span<const VideoCommon::BufferImageCopy> copies);
 
-    void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
-
     void DownloadMemory(const StagingBufferRef& map,
                         std::span<const VideoCommon::BufferImageCopy> copies);
 
@@ -126,10 +100,6 @@ public:
         return *image;
     }
 
-    [[nodiscard]] VkBuffer Buffer() const noexcept {
-        return *buffer;
-    }
-
     [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
         return aspect_mask;
     }
@@ -146,7 +116,6 @@ public:
 private:
     VKScheduler* scheduler;
     vk::Image image;
-    vk::Buffer buffer;
     MemoryCommit commit;
     vk::ImageView image_view;
     std::vector<vk::ImageView> storage_image_views;
@@ -157,18 +126,19 @@ private:
 class ImageView : public VideoCommon::ImageViewBase {
 public:
     explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+    explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
+                       const VideoCommon::ImageViewInfo&, GPUVAddr);
     explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
 
     [[nodiscard]] VkImageView DepthView();
 
     [[nodiscard]] VkImageView StencilView();
 
-    [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
-        return *image_views[static_cast<size_t>(query_type)];
-    }
+    [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
+                                          Shader::ImageFormat image_format);
 
-    [[nodiscard]] VkBufferView BufferView() const noexcept {
-        return *buffer_view;
+    [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
+        return *image_views[static_cast<size_t>(texture_type)];
     }
 
     [[nodiscard]] VkImage ImageHandle() const noexcept {
@@ -179,26 +149,36 @@ public:
         return render_target;
     }
 
-    [[nodiscard]] PixelFormat ImageFormat() const noexcept {
-        return image_format;
-    }
-
     [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
         return samples;
     }
 
+    [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
+        return gpu_addr;
+    }
+
+    [[nodiscard]] u32 BufferSize() const noexcept {
+        return buffer_size;
+    }
+
 private:
-    [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
+    struct StorageViews {
+        std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
+        std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
+    };
+
+    [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
 
     const Device* device = nullptr;
-    std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
+    std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
+    std::unique_ptr<StorageViews> storage_views;
     vk::ImageView depth_view;
     vk::ImageView stencil_view;
-    vk::BufferView buffer_view;
     VkImage image_handle = VK_NULL_HANDLE;
     VkImageView render_target = VK_NULL_HANDLE;
-    PixelFormat image_format = PixelFormat::Invalid;
     VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
+    GPUVAddr gpu_addr = 0;
+    u32 buffer_size = 0;
 };
 
 class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index dc45fdcb1..0df3a7fe9 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -15,7 +15,9 @@
 namespace Vulkan {
 
 VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
-    : device{device_}, scheduler{scheduler_} {}
+    : device{device_}, scheduler{scheduler_} {
+    payload_cursor = payload.data();
+}
 
 VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
 
@@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() {
     upload_start = payload_cursor;
 }
 
-void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
-                                   VkDescriptorSet set) {
-    const void* const data = upload_start;
-    const vk::Device* const logical = &device.GetLogical();
-    scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
-        logical->UpdateDescriptorSet(set, update_template, data);
-    });
-}
-
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index d35e77c44..d7de4c490 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -39,7 +39,9 @@ public:
 
     void Acquire();
 
-    void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
+    const DescriptorUpdateEntry* UpdateData() const noexcept {
+        return upload_start;
+    }
 
     void AddSampledImage(VkImageView image_view, VkSampler sampler) {
         *(payload_cursor++) = VkDescriptorImageInfo{
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
deleted file mode 100644
index db11144c7..000000000
--- a/src/video_core/shader/ast.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string>
-#include <string_view>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/expr.h"
-
-namespace VideoCommon::Shader {
-
-ASTZipper::ASTZipper() = default;
-
-void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
-    ASSERT(new_first->manager == nullptr);
-    first = new_first;
-    last = new_first;
-
-    ASTNode current = first;
-    while (current) {
-        current->manager = this;
-        current->parent = parent;
-        last = current;
-        current = current->next;
-    }
-}
-
-void ASTZipper::PushBack(const ASTNode new_node) {
-    ASSERT(new_node->manager == nullptr);
-    new_node->previous = last;
-    if (last) {
-        last->next = new_node;
-    }
-    new_node->next.reset();
-    last = new_node;
-    if (!first) {
-        first = new_node;
-    }
-    new_node->manager = this;
-}
-
-void ASTZipper::PushFront(const ASTNode new_node) {
-    ASSERT(new_node->manager == nullptr);
-    new_node->previous.reset();
-    new_node->next = first;
-    if (first) {
-        first->previous = new_node;
-    }
-    if (last == first) {
-        last = new_node;
-    }
-    first = new_node;
-    new_node->manager = this;
-}
-
-void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
-    ASSERT(new_node->manager == nullptr);
-    if (!at_node) {
-        PushFront(new_node);
-        return;
-    }
-    const ASTNode next = at_node->next;
-    if (next) {
-        next->previous = new_node;
-    }
-    new_node->previous = at_node;
-    if (at_node == last) {
-        last = new_node;
-    }
-    new_node->next = next;
-    at_node->next = new_node;
-    new_node->manager = this;
-}
-
-void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
-    ASSERT(new_node->manager == nullptr);
-    if (!at_node) {
-        PushBack(new_node);
-        return;
-    }
-    const ASTNode previous = at_node->previous;
-    if (previous) {
-        previous->next = new_node;
-    }
-    new_node->next = at_node;
-    if (at_node == first) {
-        first = new_node;
-    }
-    new_node->previous = previous;
-    at_node->previous = new_node;
-    new_node->manager = this;
-}
-
-void ASTZipper::DetachTail(ASTNode node) {
-    ASSERT(node->manager == this);
-    if (node == first) {
-        first.reset();
-        last.reset();
-        return;
-    }
-
-    last = node->previous;
-    last->next.reset();
-    node->previous.reset();
-
-    ASTNode current = std::move(node);
-    while (current) {
-        current->manager = nullptr;
-        current->parent.reset();
-        current = current->next;
-    }
-}
-
-void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
-    ASSERT(start->manager == this && end->manager == this);
-    if (start == end) {
-        DetachSingle(start);
-        return;
-    }
-    const ASTNode prev = start->previous;
-    const ASTNode post = end->next;
-    if (!prev) {
-        first = post;
-    } else {
-        prev->next = post;
-    }
-    if (!post) {
-        last = prev;
-    } else {
-        post->previous = prev;
-    }
-    start->previous.reset();
-    end->next.reset();
-    ASTNode current = start;
-    bool found = false;
-    while (current) {
-        current->manager = nullptr;
-        current->parent.reset();
-        found |= current == end;
-        current = current->next;
-    }
-    ASSERT(found);
-}
-
-void ASTZipper::DetachSingle(const ASTNode node) {
-    ASSERT(node->manager == this);
-    const ASTNode prev = node->previous;
-    const ASTNode post = node->next;
-    node->previous.reset();
-    node->next.reset();
-    if (!prev) {
-        first = post;
-    } else {
-        prev->next = post;
-    }
-    if (!post) {
-        last = prev;
-    } else {
-        post->previous = prev;
-    }
-
-    node->manager = nullptr;
-    node->parent.reset();
-}
-
-void ASTZipper::Remove(const ASTNode node) {
-    ASSERT(node->manager == this);
-    const ASTNode next = node->next;
-    const ASTNode previous = node->previous;
-    if (previous) {
-        previous->next = next;
-    }
-    if (next) {
-        next->previous = previous;
-    }
-    node->parent.reset();
-    node->manager = nullptr;
-    if (node == last) {
-        last = previous;
-    }
-    if (node == first) {
-        first = next;
-    }
-}
-
-class ExprPrinter final {
-public:
-    void operator()(const ExprAnd& expr) {
-        inner += "( ";
-        std::visit(*this, *expr.operand1);
-        inner += " && ";
-        std::visit(*this, *expr.operand2);
-        inner += ')';
-    }
-
-    void operator()(const ExprOr& expr) {
-        inner += "( ";
-        std::visit(*this, *expr.operand1);
-        inner += " || ";
-        std::visit(*this, *expr.operand2);
-        inner += ')';
-    }
-
-    void operator()(const ExprNot& expr) {
-        inner += "!";
-        std::visit(*this, *expr.operand1);
-    }
-
-    void operator()(const ExprPredicate& expr) {
-        inner += fmt::format("P{}", expr.predicate);
-    }
-
-    void operator()(const ExprCondCode& expr) {
-        inner += fmt::format("CC{}", expr.cc);
-    }
-
-    void operator()(const ExprVar& expr) {
-        inner += fmt::format("V{}", expr.var_index);
-    }
-
-    void operator()(const ExprBoolean& expr) {
-        inner += expr.value ? "true" : "false";
-    }
-
-    void operator()(const ExprGprEqual& expr) {
-        inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
-    }
-
-    const std::string& GetResult() const {
-        return inner;
-    }
-
-private:
-    std::string inner;
-};
-
-class ASTPrinter {
-public:
-    void operator()(const ASTProgram& ast) {
-        scope++;
-        inner += "program {\n";
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        inner += "}\n";
-        scope--;
-    }
-
-    void operator()(const ASTIfThen& ast) {
-        ExprPrinter expr_parser{};
-        std::visit(expr_parser, *ast.condition);
-        inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
-        scope++;
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        scope--;
-        inner += fmt::format("{}}}\n", Indent());
-    }
-
-    void operator()(const ASTIfElse& ast) {
-        inner += Indent();
-        inner += "else {\n";
-
-        scope++;
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        scope--;
-
-        inner += Indent();
-        inner += "}\n";
-    }
-
-    void operator()(const ASTBlockEncoded& ast) {
-        inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
-    }
-
-    void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
-        inner += Indent();
-        inner += "Block;\n";
-    }
-
-    void operator()(const ASTVarSet& ast) {
-        ExprPrinter expr_parser{};
-        std::visit(expr_parser, *ast.condition);
-        inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
-    }
-
-    void operator()(const ASTLabel& ast) {
-        inner += fmt::format("Label_{}:\n", ast.index);
-    }
-
-    void operator()(const ASTGoto& ast) {
-        ExprPrinter expr_parser{};
-        std::visit(expr_parser, *ast.condition);
-        inner +=
-            fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
-    }
-
-    void operator()(const ASTDoWhile& ast) {
-        ExprPrinter expr_parser{};
-        std::visit(expr_parser, *ast.condition);
-        inner += fmt::format("{}do {{\n", Indent());
-        scope++;
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-        scope--;
-        inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
-    }
-
-    void operator()(const ASTReturn& ast) {
-        ExprPrinter expr_parser{};
-        std::visit(expr_parser, *ast.condition);
-        inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
-                             ast.kills ? "discard" : "exit");
-    }
-
-    void operator()(const ASTBreak& ast) {
-        ExprPrinter expr_parser{};
-        std::visit(expr_parser, *ast.condition);
-        inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
-    }
-
-    void Visit(const ASTNode& node) {
-        std::visit(*this, *node->GetInnerData());
-    }
-
-    const std::string& GetResult() const {
-        return inner;
-    }
-
-private:
-    std::string_view Indent() {
-        if (space_segment_scope == scope) {
-            return space_segment;
-        }
-
-        // Ensure that we don't exceed our view.
-        ASSERT(scope * 2 < spaces.size());
-
-        space_segment = spaces.substr(0, scope * 2);
-        space_segment_scope = scope;
-        return space_segment;
-    }
-
-    std::string inner{};
-    std::string_view space_segment;
-
-    u32 scope{};
-    u32 space_segment_scope{};
-
-    static constexpr std::string_view spaces{"                                    "};
-};
-
-std::string ASTManager::Print() const {
-    ASTPrinter printer{};
-    printer.Visit(main_node);
-    return printer.GetResult();
-}
-
-ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
-    : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
-
-ASTManager::~ASTManager() {
-    Clear();
-}
-
-void ASTManager::Init() {
-    main_node = ASTBase::Make<ASTProgram>(ASTNode{});
-    program = std::get_if<ASTProgram>(main_node->GetInnerData());
-    false_condition = MakeExpr<ExprBoolean>(false);
-}
-
-void ASTManager::DeclareLabel(u32 address) {
-    const auto pair = labels_map.emplace(address, labels_count);
-    if (pair.second) {
-        labels_count++;
-        labels.resize(labels_count);
-    }
-}
-
-void ASTManager::InsertLabel(u32 address) {
-    const u32 index = labels_map[address];
-    const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
-    labels[index] = label;
-    program->nodes.PushBack(label);
-}
-
-void ASTManager::InsertGoto(Expr condition, u32 address) {
-    const u32 index = labels_map[address];
-    const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
-    gotos.push_back(goto_node);
-    program->nodes.PushBack(goto_node);
-}
-
-void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
-    ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
-    program->nodes.PushBack(std::move(block));
-}
-
-void ASTManager::InsertReturn(Expr condition, bool kills) {
-    ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
-    program->nodes.PushBack(std::move(node));
-}
-
-// The decompile algorithm is based on
-// "Taming control flow: A structured approach to eliminating goto statements"
-// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
-// on the same structured level as the label which they jump to. This is done,
-// through outward/inward movements and lifting. Once they are at the same
-// level, you can enclose them in an "if" structure or a "do-while" structure.
-void ASTManager::Decompile() {
-    auto it = gotos.begin();
-    while (it != gotos.end()) {
-        const ASTNode goto_node = *it;
-        const auto label_index = goto_node->GetGotoLabel();
-        if (!label_index) {
-            return;
-        }
-        const ASTNode label = labels[*label_index];
-        if (!full_decompile) {
-            // We only decompile backward jumps
-            if (!IsBackwardsJump(goto_node, label)) {
-                it++;
-                continue;
-            }
-        }
-        if (IndirectlyRelated(goto_node, label)) {
-            while (!DirectlyRelated(goto_node, label)) {
-                MoveOutward(goto_node);
-            }
-        }
-        if (DirectlyRelated(goto_node, label)) {
-            u32 goto_level = goto_node->GetLevel();
-            const u32 label_level = label->GetLevel();
-            while (label_level < goto_level) {
-                MoveOutward(goto_node);
-                goto_level--;
-            }
-            // TODO(Blinkhawk): Implement Lifting and Inward Movements
-        }
-        if (label->GetParent() == goto_node->GetParent()) {
-            bool is_loop = false;
-            ASTNode current = goto_node->GetPrevious();
-            while (current) {
-                if (current == label) {
-                    is_loop = true;
-                    break;
-                }
-                current = current->GetPrevious();
-            }
-
-            if (is_loop) {
-                EncloseDoWhile(goto_node, label);
-            } else {
-                EncloseIfThen(goto_node, label);
-            }
-            it = gotos.erase(it);
-            continue;
-        }
-        it++;
-    }
-    if (full_decompile) {
-        for (const ASTNode& label : labels) {
-            auto& manager = label->GetManager();
-            manager.Remove(label);
-        }
-        labels.clear();
-    } else {
-        auto label_it = labels.begin();
-        while (label_it != labels.end()) {
-            bool can_remove = true;
-            ASTNode label = *label_it;
-            for (const ASTNode& goto_node : gotos) {
-                const auto label_index = goto_node->GetGotoLabel();
-                if (!label_index) {
-                    return;
-                }
-                ASTNode& glabel = labels[*label_index];
-                if (glabel == label) {
-                    can_remove = false;
-                    break;
-                }
-            }
-            if (can_remove) {
-                label->MarkLabelUnused();
-            }
-        }
-    }
-}
-
-bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
-    u32 goto_level = goto_node->GetLevel();
-    u32 label_level = label_node->GetLevel();
-    while (goto_level > label_level) {
-        goto_level--;
-        goto_node = goto_node->GetParent();
-    }
-    while (label_level > goto_level) {
-        label_level--;
-        label_node = label_node->GetParent();
-    }
-    while (goto_node->GetParent() != label_node->GetParent()) {
-        goto_node = goto_node->GetParent();
-        label_node = label_node->GetParent();
-    }
-    ASTNode current = goto_node->GetPrevious();
-    while (current) {
-        if (current == label_node) {
-            return true;
-        }
-        current = current->GetPrevious();
-    }
-    return false;
-}
-
-bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
-    return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
-}
-
-bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
-    if (first->GetParent() == second->GetParent()) {
-        return false;
-    }
-    const u32 first_level = first->GetLevel();
-    const u32 second_level = second->GetLevel();
-    u32 min_level;
-    u32 max_level;
-    ASTNode max;
-    ASTNode min;
-    if (first_level > second_level) {
-        min_level = second_level;
-        min = second;
-        max_level = first_level;
-        max = first;
-    } else {
-        min_level = first_level;
-        min = first;
-        max_level = second_level;
-        max = second;
-    }
-
-    while (max_level > min_level) {
-        max_level--;
-        max = max->GetParent();
-    }
-
-    return min->GetParent() == max->GetParent();
-}
-
-void ASTManager::ShowCurrentState(std::string_view state) const {
-    LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
-    SanityCheck();
-}
-
-void ASTManager::SanityCheck() const {
-    for (const auto& label : labels) {
-        if (!label->GetParent()) {
-            LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
-        }
-    }
-}
-
-void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
-    ASTZipper& zipper = goto_node->GetManager();
-    const ASTNode loop_start = label->GetNext();
-    if (loop_start == goto_node) {
-        zipper.Remove(goto_node);
-        return;
-    }
-    const ASTNode parent = label->GetParent();
-    const Expr condition = goto_node->GetGotoCondition();
-    zipper.DetachSegment(loop_start, goto_node);
-    const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
-    ASTZipper* sub_zipper = do_while_node->GetSubNodes();
-    sub_zipper->Init(loop_start, do_while_node);
-    zipper.InsertAfter(do_while_node, label);
-    sub_zipper->Remove(goto_node);
-}
-
-void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
-    ASTZipper& zipper = goto_node->GetManager();
-    const ASTNode if_end = label->GetPrevious();
-    if (if_end == goto_node) {
-        zipper.Remove(goto_node);
-        return;
-    }
-    const ASTNode prev = goto_node->GetPrevious();
-    const Expr condition = goto_node->GetGotoCondition();
-    bool do_else = false;
-    if (!disable_else_derivation && prev->IsIfThen()) {
-        const Expr if_condition = prev->GetIfCondition();
-        do_else = ExprAreEqual(if_condition, condition);
-    }
-    const ASTNode parent = label->GetParent();
-    zipper.DetachSegment(goto_node, if_end);
-    ASTNode if_node;
-    if (do_else) {
-        if_node = ASTBase::Make<ASTIfElse>(parent);
-    } else {
-        Expr neg_condition = MakeExprNot(condition);
-        if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
-    }
-    ASTZipper* sub_zipper = if_node->GetSubNodes();
-    sub_zipper->Init(goto_node, if_node);
-    zipper.InsertAfter(if_node, prev);
-    sub_zipper->Remove(goto_node);
-}
-
-void ASTManager::MoveOutward(ASTNode goto_node) {
-    ASTZipper& zipper = goto_node->GetManager();
-    const ASTNode parent = goto_node->GetParent();
-    ASTZipper& zipper2 = parent->GetManager();
-    const ASTNode grandpa = parent->GetParent();
-    const bool is_loop = parent->IsLoop();
-    const bool is_else = parent->IsIfElse();
-    const bool is_if = parent->IsIfThen();
-
-    const ASTNode prev = goto_node->GetPrevious();
-    const ASTNode post = goto_node->GetNext();
-
-    const Expr condition = goto_node->GetGotoCondition();
-    zipper.DetachSingle(goto_node);
-    if (is_loop) {
-        const u32 var_index = NewVariable();
-        const Expr var_condition = MakeExpr<ExprVar>(var_index);
-        const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
-        const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
-        zipper2.InsertBefore(var_node_init, parent);
-        zipper.InsertAfter(var_node, prev);
-        goto_node->SetGotoCondition(var_condition);
-        const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
-        zipper.InsertAfter(break_node, var_node);
-    } else if (is_if || is_else) {
-        const u32 var_index = NewVariable();
-        const Expr var_condition = MakeExpr<ExprVar>(var_index);
-        const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
-        const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
-        if (is_if) {
-            zipper2.InsertBefore(var_node_init, parent);
-        } else {
-            zipper2.InsertBefore(var_node_init, parent->GetPrevious());
-        }
-        zipper.InsertAfter(var_node, prev);
-        goto_node->SetGotoCondition(var_condition);
-        if (post) {
-            zipper.DetachTail(post);
-            const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
-            ASTZipper* sub_zipper = if_node->GetSubNodes();
-            sub_zipper->Init(post, if_node);
-            zipper.InsertAfter(if_node, var_node);
-        }
-    } else {
-        UNREACHABLE();
-    }
-    const ASTNode next = parent->GetNext();
-    if (is_if && next && next->IsIfElse()) {
-        zipper2.InsertAfter(goto_node, next);
-        goto_node->SetParent(grandpa);
-        return;
-    }
-    zipper2.InsertAfter(goto_node, parent);
-    goto_node->SetParent(grandpa);
-}
-
-class ASTClearer {
-public:
-    ASTClearer() = default;
-
-    void operator()(const ASTProgram& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()(const ASTIfThen& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()(const ASTIfElse& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
-
-    void operator()(ASTBlockDecoded& ast) {
-        ast.nodes.clear();
-    }
-
-    void operator()([[maybe_unused]] const ASTVarSet& ast) {}
-
-    void operator()([[maybe_unused]] const ASTLabel& ast) {}
-
-    void operator()([[maybe_unused]] const ASTGoto& ast) {}
-
-    void operator()(const ASTDoWhile& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()([[maybe_unused]] const ASTReturn& ast) {}
-
-    void operator()([[maybe_unused]] const ASTBreak& ast) {}
-
-    void Visit(const ASTNode& node) {
-        std::visit(*this, *node->GetInnerData());
-        node->Clear();
-    }
-};
-
-void ASTManager::Clear() {
-    if (!main_node) {
-        return;
-    }
-    ASTClearer clearer{};
-    clearer.Visit(main_node);
-    main_node.reset();
-    program = nullptr;
-    labels_map.clear();
-    labels.clear();
-    gotos.clear();
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
deleted file mode 100644
index dc49b369e..000000000
--- a/src/video_core/shader/ast.h
+++ /dev/null
@@ -1,398 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <functional>
-#include <list>
-#include <memory>
-#include <optional>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "video_core/shader/expr.h"
-#include "video_core/shader/node.h"
-
-namespace VideoCommon::Shader {
-
-class ASTBase;
-class ASTBlockDecoded;
-class ASTBlockEncoded;
-class ASTBreak;
-class ASTDoWhile;
-class ASTGoto;
-class ASTIfElse;
-class ASTIfThen;
-class ASTLabel;
-class ASTProgram;
-class ASTReturn;
-class ASTVarSet;
-
-using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
-                             ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
-
-using ASTNode = std::shared_ptr<ASTBase>;
-
-enum class ASTZipperType : u32 {
-    Program,
-    IfThen,
-    IfElse,
-    Loop,
-};
-
-class ASTZipper final {
-public:
-    explicit ASTZipper();
-
-    void Init(ASTNode first, ASTNode parent);
-
-    ASTNode GetFirst() const {
-        return first;
-    }
-
-    ASTNode GetLast() const {
-        return last;
-    }
-
-    void PushBack(ASTNode new_node);
-    void PushFront(ASTNode new_node);
-    void InsertAfter(ASTNode new_node, ASTNode at_node);
-    void InsertBefore(ASTNode new_node, ASTNode at_node);
-    void DetachTail(ASTNode node);
-    void DetachSingle(ASTNode node);
-    void DetachSegment(ASTNode start, ASTNode end);
-    void Remove(ASTNode node);
-
-    ASTNode first;
-    ASTNode last;
-};
-
-class ASTProgram {
-public:
-    ASTZipper nodes{};
-};
-
-class ASTIfThen {
-public:
-    explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
-    Expr condition;
-    ASTZipper nodes{};
-};
-
-class ASTIfElse {
-public:
-    ASTZipper nodes{};
-};
-
-class ASTBlockEncoded {
-public:
-    explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
-    u32 start;
-    u32 end;
-};
-
-class ASTBlockDecoded {
-public:
-    explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
-    NodeBlock nodes;
-};
-
-class ASTVarSet {
-public:
-    explicit ASTVarSet(u32 index_, Expr condition_)
-        : index{index_}, condition{std::move(condition_)} {}
-
-    u32 index;
-    Expr condition;
-};
-
-class ASTLabel {
-public:
-    explicit ASTLabel(u32 index_) : index{index_} {}
-    u32 index;
-    bool unused{};
-};
-
-class ASTGoto {
-public:
-    explicit ASTGoto(Expr condition_, u32 label_)
-        : condition{std::move(condition_)}, label{label_} {}
-
-    Expr condition;
-    u32 label;
-};
-
-class ASTDoWhile {
-public:
-    explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
-    Expr condition;
-    ASTZipper nodes{};
-};
-
-class ASTReturn {
-public:
-    explicit ASTReturn(Expr condition_, bool kills_)
-        : condition{std::move(condition_)}, kills{kills_} {}
-
-    Expr condition;
-    bool kills;
-};
-
-class ASTBreak {
-public:
-    explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
-    Expr condition;
-};
-
-class ASTBase {
-public:
-    explicit ASTBase(ASTNode parent_, ASTData data_)
-        : data{std::move(data_)}, parent{std::move(parent_)} {}
-
-    template <class U, class... Args>
-    static ASTNode Make(ASTNode parent, Args&&... args) {
-        return std::make_shared<ASTBase>(std::move(parent),
-                                         ASTData(U(std::forward<Args>(args)...)));
-    }
-
-    void SetParent(ASTNode new_parent) {
-        parent = std::move(new_parent);
-    }
-
-    ASTNode& GetParent() {
-        return parent;
-    }
-
-    const ASTNode& GetParent() const {
-        return parent;
-    }
-
-    u32 GetLevel() const {
-        u32 level = 0;
-        auto next_parent = parent;
-        while (next_parent) {
-            next_parent = next_parent->GetParent();
-            level++;
-        }
-        return level;
-    }
-
-    ASTData* GetInnerData() {
-        return &data;
-    }
-
-    const ASTData* GetInnerData() const {
-        return &data;
-    }
-
-    ASTNode GetNext() const {
-        return next;
-    }
-
-    ASTNode GetPrevious() const {
-        return previous;
-    }
-
-    ASTZipper& GetManager() {
-        return *manager;
-    }
-
-    const ASTZipper& GetManager() const {
-        return *manager;
-    }
-
-    std::optional<u32> GetGotoLabel() const {
-        if (const auto* inner = std::get_if<ASTGoto>(&data)) {
-            return {inner->label};
-        }
-        return std::nullopt;
-    }
-
-    Expr GetGotoCondition() const {
-        if (const auto* inner = std::get_if<ASTGoto>(&data)) {
-            return inner->condition;
-        }
-        return nullptr;
-    }
-
-    void MarkLabelUnused() {
-        if (auto* inner = std::get_if<ASTLabel>(&data)) {
-            inner->unused = true;
-        }
-    }
-
-    bool IsLabelUnused() const {
-        if (const auto* inner = std::get_if<ASTLabel>(&data)) {
-            return inner->unused;
-        }
-        return true;
-    }
-
-    std::optional<u32> GetLabelIndex() const {
-        if (const auto* inner = std::get_if<ASTLabel>(&data)) {
-            return {inner->index};
-        }
-        return std::nullopt;
-    }
-
-    Expr GetIfCondition() const {
-        if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
-            return inner->condition;
-        }
-        return nullptr;
-    }
-
-    void SetGotoCondition(Expr new_condition) {
-        if (auto* inner = std::get_if<ASTGoto>(&data)) {
-            inner->condition = std::move(new_condition);
-        }
-    }
-
-    bool IsIfThen() const {
-        return std::holds_alternative<ASTIfThen>(data);
-    }
-
-    bool IsIfElse() const {
-        return std::holds_alternative<ASTIfElse>(data);
-    }
-
-    bool IsBlockEncoded() const {
-        return std::holds_alternative<ASTBlockEncoded>(data);
-    }
-
-    void TransformBlockEncoded(NodeBlock&& nodes) {
-        data = ASTBlockDecoded(std::move(nodes));
-    }
-
-    bool IsLoop() const {
-        return std::holds_alternative<ASTDoWhile>(data);
-    }
-
-    ASTZipper* GetSubNodes() {
-        if (std::holds_alternative<ASTProgram>(data)) {
-            return &std::get_if<ASTProgram>(&data)->nodes;
-        }
-        if (std::holds_alternative<ASTIfThen>(data)) {
-            return &std::get_if<ASTIfThen>(&data)->nodes;
-        }
-        if (std::holds_alternative<ASTIfElse>(data)) {
-            return &std::get_if<ASTIfElse>(&data)->nodes;
-        }
-        if (std::holds_alternative<ASTDoWhile>(data)) {
-            return &std::get_if<ASTDoWhile>(&data)->nodes;
-        }
-        return nullptr;
-    }
-
-    void Clear() {
-        next.reset();
-        previous.reset();
-        parent.reset();
-        manager = nullptr;
-    }
-
-private:
-    friend class ASTZipper;
-
-    ASTData data;
-    ASTNode parent;
-    ASTNode next;
-    ASTNode previous;
-    ASTZipper* manager{};
-};
-
-class ASTManager final {
-public:
-    explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
-    ~ASTManager();
-
-    ASTManager(const ASTManager& o) = delete;
-    ASTManager& operator=(const ASTManager& other) = delete;
-
-    ASTManager(ASTManager&& other) noexcept = default;
-    ASTManager& operator=(ASTManager&& other) noexcept = default;
-
-    void Init();
-
-    void DeclareLabel(u32 address);
-
-    void InsertLabel(u32 address);
-
-    void InsertGoto(Expr condition, u32 address);
-
-    void InsertBlock(u32 start_address, u32 end_address);
-
-    void InsertReturn(Expr condition, bool kills);
-
-    std::string Print() const;
-
-    void Decompile();
-
-    void ShowCurrentState(std::string_view state) const;
-
-    void SanityCheck() const;
-
-    void Clear();
-
-    bool IsFullyDecompiled() const {
-        if (full_decompile) {
-            return gotos.empty();
-        }
-
-        for (ASTNode goto_node : gotos) {
-            auto label_index = goto_node->GetGotoLabel();
-            if (!label_index) {
-                return false;
-            }
-            ASTNode glabel = labels[*label_index];
-            if (IsBackwardsJump(goto_node, glabel)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    ASTNode GetProgram() const {
-        return main_node;
-    }
-
-    u32 GetVariables() const {
-        return variables;
-    }
-
-    const std::vector<ASTNode>& GetLabels() const {
-        return labels;
-    }
-
-private:
-    bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
-
-    bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
-
-    bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
-
-    void EncloseDoWhile(ASTNode goto_node, ASTNode label);
-
-    void EncloseIfThen(ASTNode goto_node, ASTNode label);
-
-    void MoveOutward(ASTNode goto_node);
-
-    u32 NewVariable() {
-        return variables++;
-    }
-
-    bool full_decompile{};
-    bool disable_else_derivation{};
-    std::unordered_map<u32, u32> labels_map{};
-    u32 labels_count{};
-    std::vector<ASTNode> labels{};
-    std::list<ASTNode> gotos{};
-    u32 variables{};
-    ASTProgram* program{};
-    ASTNode main_node{};
-    Expr false_condition{};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
deleted file mode 100644
index 02adcf9c7..000000000
--- a/src/video_core/shader/async_shaders.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-#include <vector>
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_base.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/shader/async_shaders.h"
-
-namespace VideoCommon::Shader {
-
-AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
-
-AsyncShaders::~AsyncShaders() {
-    KillWorkers();
-}
-
-void AsyncShaders::AllocateWorkers() {
-    // Use at least one thread
-    u32 num_workers = 1;
-
-    // Deduce how many more threads we can use
-    const u32 thread_count = std::thread::hardware_concurrency();
-    if (thread_count >= 8) {
-        // Increase async workers by 1 for every 2 threads >= 8
-        num_workers += 1 + (thread_count - 8) / 2;
-    }
-
-    // If we already have workers queued, ignore
-    if (num_workers == worker_threads.size()) {
-        return;
-    }
-
-    // If workers already exist, clear them
-    if (!worker_threads.empty()) {
-        FreeWorkers();
-    }
-
-    // Create workers
-    for (std::size_t i = 0; i < num_workers; i++) {
-        context_list.push_back(emu_window.CreateSharedContext());
-        worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
-                                    context_list[i].get());
-    }
-}
-
-void AsyncShaders::FreeWorkers() {
-    // Mark all threads to quit
-    is_thread_exiting.store(true);
-    cv.notify_all();
-    for (auto& thread : worker_threads) {
-        thread.join();
-    }
-    // Clear our shared contexts
-    context_list.clear();
-
-    // Clear our worker threads
-    worker_threads.clear();
-}
-
-void AsyncShaders::KillWorkers() {
-    is_thread_exiting.store(true);
-    cv.notify_all();
-    for (auto& thread : worker_threads) {
-        thread.detach();
-    }
-    // Clear our shared contexts
-    context_list.clear();
-
-    // Clear our worker threads
-    worker_threads.clear();
-}
-
-bool AsyncShaders::HasWorkQueued() const {
-    return !pending_queue.empty();
-}
-
-bool AsyncShaders::HasCompletedWork() const {
-    std::shared_lock lock{completed_mutex};
-    return !finished_work.empty();
-}
-
-bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
-    const auto& regs = gpu.Maxwell3D().regs;
-
-    // If something is using depth, we can assume that games are not rendering anything which will
-    // be used one time.
-    if (regs.zeta_enable) {
-        return true;
-    }
-
-    // If games are using a small index count, we can assume these are full screen quads. Usually
-    // these shaders are only used once for building textures so we can assume they can't be built
-    // async
-    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
-        return false;
-    }
-
-    return true;
-}
-
-std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
-    std::vector<Result> results;
-    {
-        std::unique_lock lock{completed_mutex};
-        results = std::move(finished_work);
-        finished_work.clear();
-    }
-    return results;
-}
-
-void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
-                                     Tegra::Engines::ShaderType shader_type, u64 uid,
-                                     std::vector<u64> code, std::vector<u64> code_b,
-                                     u32 main_offset, CompilerSettings compiler_settings,
-                                     const Registry& registry, VAddr cpu_addr) {
-    std::unique_lock lock(queue_mutex);
-    pending_queue.push({
-        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
-        .device = &device,
-        .shader_type = shader_type,
-        .uid = uid,
-        .code = std::move(code),
-        .code_b = std::move(code_b),
-        .main_offset = main_offset,
-        .compiler_settings = compiler_settings,
-        .registry = registry,
-        .cpu_address = cpu_addr,
-        .pp_cache = nullptr,
-        .vk_device = nullptr,
-        .scheduler = nullptr,
-        .descriptor_pool = nullptr,
-        .update_descriptor_queue = nullptr,
-        .bindings{},
-        .program{},
-        .key{},
-        .num_color_buffers = 0,
-    });
-    cv.notify_one();
-}
-
-void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
-                                     const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
-                                     Vulkan::VKDescriptorPool& descriptor_pool,
-                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
-                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
-                                     Vulkan::SPIRVProgram program,
-                                     Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
-    std::unique_lock lock(queue_mutex);
-    pending_queue.push({
-        .backend = Backend::Vulkan,
-        .device = nullptr,
-        .shader_type{},
-        .uid = 0,
-        .code{},
-        .code_b{},
-        .main_offset = 0,
-        .compiler_settings{},
-        .registry{},
-        .cpu_address = 0,
-        .pp_cache = pp_cache,
-        .vk_device = &device,
-        .scheduler = &scheduler,
-        .descriptor_pool = &descriptor_pool,
-        .update_descriptor_queue = &update_descriptor_queue,
-        .bindings = std::move(bindings),
-        .program = std::move(program),
-        .key = key,
-        .num_color_buffers = num_color_buffers,
-    });
-    cv.notify_one();
-}
-
-void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
-    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
-        std::unique_lock lock{queue_mutex};
-        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
-        if (is_thread_exiting) {
-            return;
-        }
-
-        // Partial lock to allow all threads to read at the same time
-        if (!HasWorkQueued()) {
-            continue;
-        }
-        // Another thread beat us, just unlock and wait for the next load
-        if (pending_queue.empty()) {
-            continue;
-        }
-
-        // Pull work from queue
-        WorkerParams work = std::move(pending_queue.front());
-        pending_queue.pop();
-        lock.unlock();
-
-        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
-            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
-            const auto scope = context->Acquire();
-            auto program =
-                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
-            Result result{};
-            result.backend = work.backend;
-            result.cpu_address = work.cpu_address;
-            result.uid = work.uid;
-            result.code = std::move(work.code);
-            result.code_b = std::move(work.code_b);
-            result.shader_type = work.shader_type;
-
-            if (work.backend == Backend::OpenGL) {
-                result.program.opengl = std::move(program->source_program);
-            } else if (work.backend == Backend::GLASM) {
-                result.program.glasm = std::move(program->assembly_program);
-            }
-
-            {
-                std::unique_lock complete_lock(completed_mutex);
-                finished_work.push_back(std::move(result));
-            }
-        } else if (work.backend == Backend::Vulkan) {
-            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
-                *work.vk_device, *work.scheduler, *work.descriptor_pool,
-                *work.update_descriptor_queue, work.key, work.bindings, work.program,
-                work.num_color_buffers);
-
-            work.pp_cache->EmplacePipeline(std::move(pipeline));
-        }
-    }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
deleted file mode 100644
index 7fdff6e56..000000000
--- a/src/video_core/shader/async_shaders.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <condition_variable>
-#include <memory>
-#include <shared_mutex>
-#include <thread>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/vulkan_common/vulkan_device.h"
-
-namespace Core::Frontend {
-class EmuWindow;
-class GraphicsContext;
-} // namespace Core::Frontend
-
-namespace Tegra {
-class GPU;
-}
-
-namespace Vulkan {
-class VKPipelineCache;
-}
-
-namespace VideoCommon::Shader {
-
-class AsyncShaders {
-public:
-    enum class Backend {
-        OpenGL,
-        GLASM,
-        Vulkan,
-    };
-
-    struct ResultPrograms {
-        OpenGL::OGLProgram opengl;
-        OpenGL::OGLAssemblyProgram glasm;
-    };
-
-    struct Result {
-        u64 uid;
-        VAddr cpu_address;
-        Backend backend;
-        ResultPrograms program;
-        std::vector<u64> code;
-        std::vector<u64> code_b;
-        Tegra::Engines::ShaderType shader_type;
-    };
-
-    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
-    ~AsyncShaders();
-
-    /// Start up shader worker threads
-    void AllocateWorkers();
-
-    /// Clear the shader queue and kill all worker threads
-    void FreeWorkers();
-
-    // Force end all threads
-    void KillWorkers();
-
-    /// Check to see if any shaders have actually been compiled
-    [[nodiscard]] bool HasCompletedWork() const;
-
-    /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
-    /// every shader async as some shaders are only built and executed once. We try to "guess" which
-    /// shader would be used only once
-    [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
-
-    /// Pulls completed compiled shaders
-    [[nodiscard]] std::vector<Result> GetCompletedWork();
-
-    void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
-                           u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
-                           CompilerSettings compiler_settings, const Registry& registry,
-                           VAddr cpu_addr);
-
-    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
-                           Vulkan::VKScheduler& scheduler,
-                           Vulkan::VKDescriptorPool& descriptor_pool,
-                           Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
-                           std::vector<VkDescriptorSetLayoutBinding> bindings,
-                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
-                           u32 num_color_buffers);
-
-private:
-    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
-
-    /// Check our worker queue to see if we have any work queued already
-    [[nodiscard]] bool HasWorkQueued() const;
-
-    struct WorkerParams {
-        Backend backend;
-        // For OGL
-        const OpenGL::Device* device;
-        Tegra::Engines::ShaderType shader_type;
-        u64 uid;
-        std::vector<u64> code;
-        std::vector<u64> code_b;
-        u32 main_offset;
-        CompilerSettings compiler_settings;
-        std::optional<Registry> registry;
-        VAddr cpu_address;
-
-        // For Vulkan
-        Vulkan::VKPipelineCache* pp_cache;
-        const Vulkan::Device* vk_device;
-        Vulkan::VKScheduler* scheduler;
-        Vulkan::VKDescriptorPool* descriptor_pool;
-        Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
-        std::vector<VkDescriptorSetLayoutBinding> bindings;
-        Vulkan::SPIRVProgram program;
-        Vulkan::GraphicsPipelineCacheKey key;
-        u32 num_color_buffers;
-    };
-
-    std::condition_variable cv;
-    mutable std::mutex queue_mutex;
-    mutable std::shared_mutex completed_mutex;
-    std::atomic<bool> is_thread_exiting{};
-    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
-    std::vector<std::thread> worker_threads;
-    std::queue<WorkerParams> pending_queue;
-    std::vector<Result> finished_work;
-    Core::Frontend::EmuWindow& emu_window;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
deleted file mode 100644
index cddcbd4f0..000000000
--- a/src/video_core/shader/compiler_settings.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/shader/compiler_settings.h"
-
-namespace VideoCommon::Shader {
-
-std::string CompileDepthAsString(const CompileDepth cd) {
-    switch (cd) {
-    case CompileDepth::BruteForce:
-        return "Brute Force Compile";
-    case CompileDepth::FlowStack:
-        return "Simple Flow Stack Mode";
-    case CompileDepth::NoFlowStack:
-        return "Remove Flow Stack";
-    case CompileDepth::DecompileBackwards:
-        return "Decompile Backward Jumps";
-    case CompileDepth::FullDecompile:
-        return "Full Decompilation";
-    default:
-        return "Unknown Compiler Process";
-    }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
deleted file mode 100644
index 916018c01..000000000
--- a/src/video_core/shader/compiler_settings.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-enum class CompileDepth : u32 {
-    BruteForce = 0,
-    FlowStack = 1,
-    NoFlowStack = 2,
-    DecompileBackwards = 3,
-    FullDecompile = 4,
-};
-
-std::string CompileDepthAsString(CompileDepth cd);
-
-struct CompilerSettings {
-    CompileDepth depth{CompileDepth::NoFlowStack};
-    bool disable_else_derivation{true};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
deleted file mode 100644
index 43d965f2f..000000000
--- a/src/video_core/shader/control_flow.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <list>
-#include <map>
-#include <set>
-#include <stack>
-#include <unordered_map>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/control_flow.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-constexpr s32 unassigned_branch = -2;
-
-struct Query {
-    u32 address{};
-    std::stack<u32> ssy_stack{};
-    std::stack<u32> pbk_stack{};
-};
-
-struct BlockStack {
-    BlockStack() = default;
-    explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
-    std::stack<u32> ssy_stack{};
-    std::stack<u32> pbk_stack{};
-};
-
-template <typename T, typename... Args>
-BlockBranchInfo MakeBranchInfo(Args&&... args) {
-    static_assert(std::is_convertible_v<T, BranchData>);
-    return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
-}
-
-bool BlockBranchIsIgnored(BlockBranchInfo first) {
-    bool ignore = false;
-    if (std::holds_alternative<SingleBranch>(*first)) {
-        const auto branch = std::get_if<SingleBranch>(first.get());
-        ignore = branch->ignore;
-    }
-    return ignore;
-}
-
-struct BlockInfo {
-    u32 start{};
-    u32 end{};
-    bool visited{};
-    BlockBranchInfo branch{};
-
-    bool IsInside(const u32 address) const {
-        return start <= address && address <= end;
-    }
-};
-
-struct CFGRebuildState {
-    explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
-        : program_code{program_code_}, registry{registry_}, start{start_} {}
-
-    const ProgramCode& program_code;
-    Registry& registry;
-    u32 start{};
-    std::vector<BlockInfo> block_info;
-    std::list<u32> inspect_queries;
-    std::list<Query> queries;
-    std::unordered_map<u32, u32> registered;
-    std::set<u32> labels;
-    std::map<u32, u32> ssy_labels;
-    std::map<u32, u32> pbk_labels;
-    std::unordered_map<u32, BlockStack> stacks;
-    ASTManager* manager{};
-};
-
-enum class BlockCollision : u32 { None, Found, Inside };
-
-std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
-    const auto& blocks = state.block_info;
-    for (u32 index = 0; index < blocks.size(); index++) {
-        if (blocks[index].start == address) {
-            return {BlockCollision::Found, index};
-        }
-        if (blocks[index].IsInside(address)) {
-            return {BlockCollision::Inside, index};
-        }
-    }
-    return {BlockCollision::None, 0xFFFFFFFF};
-}
-
-struct ParseInfo {
-    BlockBranchInfo branch_info{};
-    u32 end_address{};
-};
-
-BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
-    auto& it = state.block_info.emplace_back();
-    it.start = start;
-    it.end = end;
-    const u32 index = static_cast<u32>(state.block_info.size() - 1);
-    state.registered.insert({start, index});
-    return it;
-}
-
-Pred GetPredicate(u32 index, bool negated) {
-    return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
-}
-
-enum class ParseResult : u32 {
-    ControlCaught,
-    BlockEnd,
-    AbnormalFlow,
-};
-
-struct BranchIndirectInfo {
-    u32 buffer{};
-    u32 offset{};
-    u32 entries{};
-    s32 relative_position{};
-};
-
-struct BufferInfo {
-    u32 index;
-    u32 offset;
-};
-
-std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
-    const Instruction instr = state.program_code[pos];
-    const auto opcode = OpCode::Decode(instr);
-    if (opcode->get().GetId() != OpCode::Id::BRX) {
-        return std::nullopt;
-    }
-    if (instr.brx.constant_buffer != 0) {
-        return std::nullopt;
-    }
-    --pos;
-    return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
-}
-
-template <typename Result, typename TestCallable, typename PackCallable>
-// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
-// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
-std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
-                                       PackCallable pack) {
-    for (; pos >= state.start; --pos) {
-        if (IsSchedInstruction(pos, state.start)) {
-            continue;
-        }
-        const Instruction instr = state.program_code[pos];
-        const auto opcode = OpCode::Decode(instr);
-        if (!opcode) {
-            continue;
-        }
-        if (test(instr, opcode->get())) {
-            --pos;
-            return std::make_optional(pack(instr, opcode->get()));
-        }
-    }
-    return std::nullopt;
-}
-
-std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
-                                                   u64 brx_tracked_register) {
-    return TrackInstruction<std::pair<BufferInfo, u64>>(
-        state, pos,
-        [brx_tracked_register](auto instr, const auto& opcode) {
-            return opcode.GetId() == OpCode::Id::LD_C &&
-                   instr.gpr0.Value() == brx_tracked_register &&
-                   instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
-        },
-        [](auto instr, const auto& opcode) {
-            const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
-                                     static_cast<u32>(instr.cbuf36.GetOffset())};
-            return std::make_pair(info, instr.gpr8.Value());
-        });
-}
-
-std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
-                                    u64 ldc_tracked_register) {
-    return TrackInstruction<u64>(
-        state, pos,
-        [ldc_tracked_register](auto instr, const auto& opcode) {
-            return opcode.GetId() == OpCode::Id::SHL_IMM &&
-                   instr.gpr0.Value() == ldc_tracked_register;
-        },
-        [](auto instr, const auto&) { return instr.gpr8.Value(); });
-}
-
-std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
-                                   u64 shl_tracked_register) {
-    return TrackInstruction<u32>(
-        state, pos,
-        [shl_tracked_register](auto instr, const auto& opcode) {
-            return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
-                   instr.gpr0.Value() == shl_tracked_register;
-        },
-        [](auto instr, const auto&) {
-            return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
-        });
-}
-
-std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
-    const auto brx_info = GetBRXInfo(state, pos);
-    if (!brx_info) {
-        return std::nullopt;
-    }
-    const auto [relative_position, brx_tracked_register] = *brx_info;
-
-    const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
-    if (!ldc_info) {
-        return std::nullopt;
-    }
-    const auto [buffer_info, ldc_tracked_register] = *ldc_info;
-
-    const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
-    if (!shl_tracked_register) {
-        return std::nullopt;
-    }
-
-    const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
-    if (!entries) {
-        return std::nullopt;
-    }
-
-    return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
-}
-
-std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
-    u32 offset = static_cast<u32>(address);
-    const u32 end_address = static_cast<u32>(state.program_code.size());
-    ParseInfo parse_info{};
-    SingleBranch single_branch{};
-
-    const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
-        const auto pair = rebuild_state.labels.emplace(label_address);
-        if (pair.second) {
-            rebuild_state.inspect_queries.push_back(label_address);
-        }
-    };
-
-    while (true) {
-        if (offset >= end_address) {
-            // ASSERT_OR_EXECUTE can't be used, as it ignores the break
-            ASSERT_MSG(false, "Shader passed the current limit!");
-
-            single_branch.address = exit_branch;
-            single_branch.ignore = false;
-            break;
-        }
-        if (state.registered.contains(offset)) {
-            single_branch.address = offset;
-            single_branch.ignore = true;
-            break;
-        }
-        if (IsSchedInstruction(offset, state.start)) {
-            offset++;
-            continue;
-        }
-        const Instruction instr = {state.program_code[offset]};
-        const auto opcode = OpCode::Decode(instr);
-        if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
-            offset++;
-            continue;
-        }
-
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::EXIT: {
-            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
-            if (single_branch.condition.predicate == Pred::NeverExecute) {
-                offset++;
-                continue;
-            }
-            const ConditionCode cc = instr.flow_condition_code;
-            single_branch.condition.cc = cc;
-            if (cc == ConditionCode::F) {
-                offset++;
-                continue;
-            }
-            single_branch.address = exit_branch;
-            single_branch.kill = false;
-            single_branch.is_sync = false;
-            single_branch.is_brk = false;
-            single_branch.ignore = false;
-            parse_info.end_address = offset;
-            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
-                single_branch.condition, single_branch.address, single_branch.kill,
-                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
-            return {ParseResult::ControlCaught, parse_info};
-        }
-        case OpCode::Id::BRA: {
-            if (instr.bra.constant_buffer != 0) {
-                return {ParseResult::AbnormalFlow, parse_info};
-            }
-            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
-            if (single_branch.condition.predicate == Pred::NeverExecute) {
-                offset++;
-                continue;
-            }
-            const ConditionCode cc = instr.flow_condition_code;
-            single_branch.condition.cc = cc;
-            if (cc == ConditionCode::F) {
-                offset++;
-                continue;
-            }
-            const u32 branch_offset = offset + instr.bra.GetBranchTarget();
-            if (branch_offset == 0) {
-                single_branch.address = exit_branch;
-            } else {
-                single_branch.address = branch_offset;
-            }
-            insert_label(state, branch_offset);
-            single_branch.kill = false;
-            single_branch.is_sync = false;
-            single_branch.is_brk = false;
-            single_branch.ignore = false;
-            parse_info.end_address = offset;
-            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
-                single_branch.condition, single_branch.address, single_branch.kill,
-                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
-            return {ParseResult::ControlCaught, parse_info};
-        }
-        case OpCode::Id::SYNC: {
-            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
-            if (single_branch.condition.predicate == Pred::NeverExecute) {
-                offset++;
-                continue;
-            }
-            const ConditionCode cc = instr.flow_condition_code;
-            single_branch.condition.cc = cc;
-            if (cc == ConditionCode::F) {
-                offset++;
-                continue;
-            }
-            single_branch.address = unassigned_branch;
-            single_branch.kill = false;
-            single_branch.is_sync = true;
-            single_branch.is_brk = false;
-            single_branch.ignore = false;
-            parse_info.end_address = offset;
-            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
-                single_branch.condition, single_branch.address, single_branch.kill,
-                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
-            return {ParseResult::ControlCaught, parse_info};
-        }
-        case OpCode::Id::BRK: {
-            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
-            if (single_branch.condition.predicate == Pred::NeverExecute) {
-                offset++;
-                continue;
-            }
-            const ConditionCode cc = instr.flow_condition_code;
-            single_branch.condition.cc = cc;
-            if (cc == ConditionCode::F) {
-                offset++;
-                continue;
-            }
-            single_branch.address = unassigned_branch;
-            single_branch.kill = false;
-            single_branch.is_sync = false;
-            single_branch.is_brk = true;
-            single_branch.ignore = false;
-            parse_info.end_address = offset;
-            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
-                single_branch.condition, single_branch.address, single_branch.kill,
-                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
-            return {ParseResult::ControlCaught, parse_info};
-        }
-        case OpCode::Id::KIL: {
-            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
-            if (single_branch.condition.predicate == Pred::NeverExecute) {
-                offset++;
-                continue;
-            }
-            const ConditionCode cc = instr.flow_condition_code;
-            single_branch.condition.cc = cc;
-            if (cc == ConditionCode::F) {
-                offset++;
-                continue;
-            }
-            single_branch.address = exit_branch;
-            single_branch.kill = true;
-            single_branch.is_sync = false;
-            single_branch.is_brk = false;
-            single_branch.ignore = false;
-            parse_info.end_address = offset;
-            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
-                single_branch.condition, single_branch.address, single_branch.kill,
-                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
-            return {ParseResult::ControlCaught, parse_info};
-        }
-        case OpCode::Id::SSY: {
-            const u32 target = offset + instr.bra.GetBranchTarget();
-            insert_label(state, target);
-            state.ssy_labels.emplace(offset, target);
-            break;
-        }
-        case OpCode::Id::PBK: {
-            const u32 target = offset + instr.bra.GetBranchTarget();
-            insert_label(state, target);
-            state.pbk_labels.emplace(offset, target);
-            break;
-        }
-        case OpCode::Id::BRX: {
-            const auto tmp = TrackBranchIndirectInfo(state, offset);
-            if (!tmp) {
-                LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
-                return {ParseResult::AbnormalFlow, parse_info};
-            }
-
-            const auto result = *tmp;
-            const s32 pc_target = offset + result.relative_position;
-            std::vector<CaseBranch> branches;
-            for (u32 i = 0; i < result.entries; i++) {
-                auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
-                if (!key) {
-                    return {ParseResult::AbnormalFlow, parse_info};
-                }
-                u32 value = *key;
-                u32 target = static_cast<u32>((value >> 3) + pc_target);
-                insert_label(state, target);
-                branches.emplace_back(value, target);
-            }
-            parse_info.end_address = offset;
-            parse_info.branch_info = MakeBranchInfo<MultiBranch>(
-                static_cast<u32>(instr.gpr8.Value()), std::move(branches));
-
-            return {ParseResult::ControlCaught, parse_info};
-        }
-        default:
-            break;
-        }
-
-        offset++;
-    }
-    single_branch.kill = false;
-    single_branch.is_sync = false;
-    single_branch.is_brk = false;
-    parse_info.end_address = offset - 1;
-    parse_info.branch_info = MakeBranchInfo<SingleBranch>(
-        single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
-        single_branch.is_brk, single_branch.ignore);
-    return {ParseResult::BlockEnd, parse_info};
-}
-
-bool TryInspectAddress(CFGRebuildState& state) {
-    if (state.inspect_queries.empty()) {
-        return false;
-    }
-
-    const u32 address = state.inspect_queries.front();
-    state.inspect_queries.pop_front();
-    const auto [result, block_index] = TryGetBlock(state, address);
-    switch (result) {
-    case BlockCollision::Found: {
-        return true;
-    }
-    case BlockCollision::Inside: {
-        // This case is the tricky one:
-        // We need to split the block into 2 separate blocks
-        const u32 end = state.block_info[block_index].end;
-        BlockInfo& new_block = CreateBlockInfo(state, address, end);
-        BlockInfo& current_block = state.block_info[block_index];
-        current_block.end = address - 1;
-        new_block.branch = std::move(current_block.branch);
-        BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
-        const auto branch = std::get_if<SingleBranch>(forward_branch.get());
-        branch->address = address;
-        branch->ignore = true;
-        current_block.branch = std::move(forward_branch);
-        return true;
-    }
-    default:
-        break;
-    }
-    const auto [parse_result, parse_info] = ParseCode(state, address);
-    if (parse_result == ParseResult::AbnormalFlow) {
-        // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
-        return false;
-    }
-
-    BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
-    block_info.branch = parse_info.branch_info;
-    if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
-        const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
-        if (branch->condition.IsUnconditional()) {
-            return true;
-        }
-        const u32 fallthrough_address = parse_info.end_address + 1;
-        state.inspect_queries.push_front(fallthrough_address);
-        return true;
-    }
-    return true;
-}
-
-bool TryQuery(CFGRebuildState& state) {
-    const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
-                                  BlockInfo& block) {
-        auto gather_start = labels.lower_bound(block.start);
-        const auto gather_end = labels.upper_bound(block.end);
-        while (gather_start != gather_end) {
-            cc.push(gather_start->second);
-            ++gather_start;
-        }
-    };
-    if (state.queries.empty()) {
-        return false;
-    }
-
-    Query& q = state.queries.front();
-    const u32 block_index = state.registered[q.address];
-    BlockInfo& block = state.block_info[block_index];
-    // If the block is visited, check if the stacks match, else gather the ssy/pbk
-    // labels into the current stack and look if the branch at the end of the block
-    // consumes a label. Schedule new queries accordingly
-    if (block.visited) {
-        BlockStack& stack = state.stacks[q.address];
-        const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
-                              (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
-        state.queries.pop_front();
-        return all_okay;
-    }
-    block.visited = true;
-    state.stacks.insert_or_assign(q.address, BlockStack{q});
-
-    Query q2(q);
-    state.queries.pop_front();
-    gather_labels(q2.ssy_stack, state.ssy_labels, block);
-    gather_labels(q2.pbk_stack, state.pbk_labels, block);
-    if (std::holds_alternative<SingleBranch>(*block.branch)) {
-        auto* branch = std::get_if<SingleBranch>(block.branch.get());
-        if (!branch->condition.IsUnconditional()) {
-            q2.address = block.end + 1;
-            state.queries.push_back(q2);
-        }
-
-        auto& conditional_query = state.queries.emplace_back(q2);
-        if (branch->is_sync) {
-            if (branch->address == unassigned_branch) {
-                branch->address = conditional_query.ssy_stack.top();
-            }
-            conditional_query.ssy_stack.pop();
-        }
-        if (branch->is_brk) {
-            if (branch->address == unassigned_branch) {
-                branch->address = conditional_query.pbk_stack.top();
-            }
-            conditional_query.pbk_stack.pop();
-        }
-        conditional_query.address = branch->address;
-        return true;
-    }
-
-    const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
-    for (const auto& branch_case : multi_branch->branches) {
-        auto& conditional_query = state.queries.emplace_back(q2);
-        conditional_query.address = branch_case.address;
-    }
-
-    return true;
-}
-
-void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
-    const auto get_expr = [](const Condition& cond) -> Expr {
-        Expr result;
-        if (cond.cc != ConditionCode::T) {
-            result = MakeExpr<ExprCondCode>(cond.cc);
-        }
-        if (cond.predicate != Pred::UnusedIndex) {
-            u32 pred = static_cast<u32>(cond.predicate);
-            bool negate = false;
-            if (pred > 7) {
-                negate = true;
-                pred -= 8;
-            }
-            Expr extra = MakeExpr<ExprPredicate>(pred);
-            if (negate) {
-                extra = MakeExpr<ExprNot>(std::move(extra));
-            }
-            if (result) {
-                return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
-            }
-            return extra;
-        }
-        if (result) {
-            return result;
-        }
-        return MakeExpr<ExprBoolean>(true);
-    };
-
-    if (std::holds_alternative<SingleBranch>(*branch_info)) {
-        const auto* branch = std::get_if<SingleBranch>(branch_info.get());
-        if (branch->address < 0) {
-            if (branch->kill) {
-                mm.InsertReturn(get_expr(branch->condition), true);
-                return;
-            }
-            mm.InsertReturn(get_expr(branch->condition), false);
-            return;
-        }
-        mm.InsertGoto(get_expr(branch->condition), branch->address);
-        return;
-    }
-    const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
-    for (const auto& branch_case : multi_branch->branches) {
-        mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
-                      branch_case.address);
-    }
-}
-
-void DecompileShader(CFGRebuildState& state) {
-    state.manager->Init();
-    for (auto label : state.labels) {
-        state.manager->DeclareLabel(label);
-    }
-    for (const auto& block : state.block_info) {
-        if (state.labels.contains(block.start)) {
-            state.manager->InsertLabel(block.start);
-        }
-        const bool ignore = BlockBranchIsIgnored(block.branch);
-        const u32 end = ignore ? block.end + 1 : block.end;
-        state.manager->InsertBlock(block.start, end);
-        if (!ignore) {
-            InsertBranch(*state.manager, block.branch);
-        }
-    }
-    state.manager->Decompile();
-}
-
-} // Anonymous namespace
-
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
-                                                const CompilerSettings& settings,
-                                                Registry& registry) {
-    auto result_out = std::make_unique<ShaderCharacteristics>();
-    if (settings.depth == CompileDepth::BruteForce) {
-        result_out->settings.depth = CompileDepth::BruteForce;
-        return result_out;
-    }
-
-    CFGRebuildState state{program_code, start_address, registry};
-    // Inspect Code and generate blocks
-    state.labels.clear();
-    state.labels.emplace(start_address);
-    state.inspect_queries.push_back(state.start);
-    while (!state.inspect_queries.empty()) {
-        if (!TryInspectAddress(state)) {
-            result_out->settings.depth = CompileDepth::BruteForce;
-            return result_out;
-        }
-    }
-
-    bool use_flow_stack = true;
-
-    bool decompiled = false;
-
-    if (settings.depth != CompileDepth::FlowStack) {
-        // Decompile Stacks
-        state.queries.push_back(Query{state.start, {}, {}});
-        decompiled = true;
-        while (!state.queries.empty()) {
-            if (!TryQuery(state)) {
-                decompiled = false;
-                break;
-            }
-        }
-    }
-
-    use_flow_stack = !decompiled;
-
-    // Sort and organize results
-    std::sort(state.block_info.begin(), state.block_info.end(),
-              [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
-    if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
-        ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
-                           settings.disable_else_derivation};
-        state.manager = &manager;
-        DecompileShader(state);
-        decompiled = state.manager->IsFullyDecompiled();
-        if (!decompiled) {
-            if (settings.depth == CompileDepth::FullDecompile) {
-                LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
-            } else {
-                LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
-            }
-            state.manager->ShowCurrentState("Of Shader");
-            state.manager->Clear();
-        } else {
-            auto characteristics = std::make_unique<ShaderCharacteristics>();
-            characteristics->start = start_address;
-            characteristics->settings.depth = settings.depth;
-            characteristics->manager = std::move(manager);
-            characteristics->end = state.block_info.back().end + 1;
-            return characteristics;
-        }
-    }
-
-    result_out->start = start_address;
-    result_out->settings.depth =
-        use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
-    result_out->blocks.clear();
-    for (auto& block : state.block_info) {
-        ShaderBlock new_block{};
-        new_block.start = block.start;
-        new_block.end = block.end;
-        new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
-        if (!new_block.ignore_branch) {
-            new_block.branch = block.branch;
-        }
-        result_out->end = std::max(result_out->end, block.end);
-        result_out->blocks.push_back(new_block);
-    }
-    if (!use_flow_stack) {
-        result_out->labels = std::move(state.labels);
-        return result_out;
-    }
-
-    auto back = result_out->blocks.begin();
-    auto next = std::next(back);
-    while (next != result_out->blocks.end()) {
-        if (!state.labels.contains(next->start) && next->start == back->end + 1) {
-            back->end = next->end;
-            next = result_out->blocks.erase(next);
-            continue;
-        }
-        back = next;
-        ++next;
-    }
-
-    return result_out;
-}
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
deleted file mode 100644
index 37bf96492..000000000
--- a/src/video_core/shader/control_flow.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <list>
-#include <optional>
-#include <set>
-#include <variant>
-
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Pred;
-
-constexpr s32 exit_branch = -1;
-
-struct Condition {
-    Pred predicate{Pred::UnusedIndex};
-    ConditionCode cc{ConditionCode::T};
-
-    bool IsUnconditional() const {
-        return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
-    }
-
-    bool operator==(const Condition& other) const {
-        return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
-    }
-
-    bool operator!=(const Condition& other) const {
-        return !operator==(other);
-    }
-};
-
-class SingleBranch {
-public:
-    SingleBranch() = default;
-    explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
-                          bool is_brk_, bool ignore_)
-        : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
-          ignore{ignore_} {}
-
-    bool operator==(const SingleBranch& b) const {
-        return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
-               std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
-    }
-
-    bool operator!=(const SingleBranch& b) const {
-        return !operator==(b);
-    }
-
-    Condition condition{};
-    s32 address{exit_branch};
-    bool kill{};
-    bool is_sync{};
-    bool is_brk{};
-    bool ignore{};
-};
-
-struct CaseBranch {
-    explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
-    u32 cmp_value;
-    u32 address;
-};
-
-class MultiBranch {
-public:
-    explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
-        : gpr{gpr_}, branches{std::move(branches_)} {}
-
-    u32 gpr{};
-    std::vector<CaseBranch> branches{};
-};
-
-using BranchData = std::variant<SingleBranch, MultiBranch>;
-using BlockBranchInfo = std::shared_ptr<BranchData>;
-
-bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
-
-struct ShaderBlock {
-    u32 start{};
-    u32 end{};
-    bool ignore_branch{};
-    BlockBranchInfo branch{};
-
-    bool operator==(const ShaderBlock& sb) const {
-        return std::tie(start, end, ignore_branch) ==
-                   std::tie(sb.start, sb.end, sb.ignore_branch) &&
-               BlockBranchInfoAreEqual(branch, sb.branch);
-    }
-
-    bool operator!=(const ShaderBlock& sb) const {
-        return !operator==(sb);
-    }
-};
-
-struct ShaderCharacteristics {
-    std::list<ShaderBlock> blocks{};
-    std::set<u32> labels{};
-    u32 start{};
-    u32 end{};
-    ASTManager manager{true, true};
-    CompilerSettings settings{};
-};
-
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
-                                                const CompilerSettings& settings,
-                                                Registry& registry);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
deleted file mode 100644
index 6576d1208..000000000
--- a/src/video_core/shader/decode.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <limits>
-#include <set>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/shader/control_flow.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-namespace {
-
-void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
-                              const std::list<SamplerEntry>& used_samplers) {
-    if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
-        return;
-    }
-    u32 count{};
-    std::vector<u32> bound_offsets;
-    for (const auto& sampler : used_samplers) {
-        if (sampler.is_bindless) {
-            continue;
-        }
-        ++count;
-        bound_offsets.emplace_back(sampler.offset);
-    }
-    if (count > 1) {
-        gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
-    }
-}
-
-std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
-                                        VideoCore::GuestDriverProfile& gpu_driver,
-                                        const std::list<SamplerEntry>& used_samplers) {
-    const u32 base_offset = sampler_to_deduce.offset;
-    u32 max_offset{std::numeric_limits<u32>::max()};
-    for (const auto& sampler : used_samplers) {
-        if (sampler.is_bindless) {
-            continue;
-        }
-        if (sampler.offset > base_offset) {
-            max_offset = std::min(sampler.offset, max_offset);
-        }
-    }
-    if (max_offset == std::numeric_limits<u32>::max()) {
-        return std::nullopt;
-    }
-    return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
-}
-
-} // Anonymous namespace
-
-class ASTDecoder {
-public:
-    explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
-
-    void operator()(ASTProgram& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()(ASTIfThen& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()(ASTIfElse& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()(ASTBlockEncoded& ast) {}
-
-    void operator()(ASTBlockDecoded& ast) {}
-
-    void operator()(ASTVarSet& ast) {}
-
-    void operator()(ASTLabel& ast) {}
-
-    void operator()(ASTGoto& ast) {}
-
-    void operator()(ASTDoWhile& ast) {
-        ASTNode current = ast.nodes.GetFirst();
-        while (current) {
-            Visit(current);
-            current = current->GetNext();
-        }
-    }
-
-    void operator()(ASTReturn& ast) {}
-
-    void operator()(ASTBreak& ast) {}
-
-    void Visit(ASTNode& node) {
-        std::visit(*this, *node->GetInnerData());
-        if (node->IsBlockEncoded()) {
-            auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
-            NodeBlock bb = ir.DecodeRange(block->start, block->end);
-            node->TransformBlockEncoded(std::move(bb));
-        }
-    }
-
-private:
-    ShaderIR& ir;
-};
-
-void ShaderIR::Decode() {
-    std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
-
-    decompiled = false;
-    auto info = ScanFlow(program_code, main_offset, settings, registry);
-    auto& shader_info = *info;
-    coverage_begin = shader_info.start;
-    coverage_end = shader_info.end;
-    switch (shader_info.settings.depth) {
-    case CompileDepth::FlowStack: {
-        for (const auto& block : shader_info.blocks) {
-            basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
-        }
-        break;
-    }
-    case CompileDepth::NoFlowStack: {
-        disable_flow_stack = true;
-        const auto insert_block = [this](NodeBlock& nodes, u32 label) {
-            if (label == static_cast<u32>(exit_branch)) {
-                return;
-            }
-            basic_blocks.insert({label, nodes});
-        };
-        const auto& blocks = shader_info.blocks;
-        NodeBlock current_block;
-        u32 current_label = static_cast<u32>(exit_branch);
-        for (const auto& block : blocks) {
-            if (shader_info.labels.contains(block.start)) {
-                insert_block(current_block, current_label);
-                current_block.clear();
-                current_label = block.start;
-            }
-            if (!block.ignore_branch) {
-                DecodeRangeInner(current_block, block.start, block.end);
-                InsertControlFlow(current_block, block);
-            } else {
-                DecodeRangeInner(current_block, block.start, block.end + 1);
-            }
-        }
-        insert_block(current_block, current_label);
-        break;
-    }
-    case CompileDepth::DecompileBackwards:
-    case CompileDepth::FullDecompile: {
-        program_manager = std::move(shader_info.manager);
-        disable_flow_stack = true;
-        decompiled = true;
-        ASTDecoder decoder{*this};
-        ASTNode program = GetASTProgram();
-        decoder.Visit(program);
-        break;
-    }
-    default:
-        LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
-        [[fallthrough]];
-    case CompileDepth::BruteForce: {
-        const auto shader_end = static_cast<u32>(program_code.size());
-        coverage_begin = main_offset;
-        coverage_end = shader_end;
-        for (u32 label = main_offset; label < shader_end; ++label) {
-            basic_blocks.insert({label, DecodeRange(label, label + 1)});
-        }
-        break;
-    }
-    }
-    if (settings.depth != shader_info.settings.depth) {
-        LOG_WARNING(
-            HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
-            CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
-    }
-}
-
-NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
-    NodeBlock basic_block;
-    DecodeRangeInner(basic_block, begin, end);
-    return basic_block;
-}
-
-void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
-    for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
-        pc = DecodeInstr(bb, pc);
-    }
-}
-
-void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
-    const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
-        Node result = n;
-        if (cond.cc != ConditionCode::T) {
-            result = Conditional(GetConditionCode(cond.cc), {result});
-        }
-        if (cond.predicate != Pred::UnusedIndex) {
-            u32 pred = static_cast<u32>(cond.predicate);
-            const bool is_neg = pred > 7;
-            if (is_neg) {
-                pred -= 8;
-            }
-            result = Conditional(GetPredicate(pred, is_neg), {result});
-        }
-        return result;
-    };
-    if (std::holds_alternative<SingleBranch>(*block.branch)) {
-        auto branch = std::get_if<SingleBranch>(block.branch.get());
-        if (branch->address < 0) {
-            if (branch->kill) {
-                Node n = Operation(OperationCode::Discard);
-                n = apply_conditions(branch->condition, n);
-                bb.push_back(n);
-                global_code.push_back(n);
-                return;
-            }
-            Node n = Operation(OperationCode::Exit);
-            n = apply_conditions(branch->condition, n);
-            bb.push_back(n);
-            global_code.push_back(n);
-            return;
-        }
-        Node n = Operation(OperationCode::Branch, Immediate(branch->address));
-        n = apply_conditions(branch->condition, n);
-        bb.push_back(n);
-        global_code.push_back(n);
-        return;
-    }
-    auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
-    Node op_a = GetRegister(multi_branch->gpr);
-    for (auto& branch_case : multi_branch->branches) {
-        Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
-        Node op_b = Immediate(branch_case.cmp_value);
-        Node condition =
-            GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
-        auto result = Conditional(condition, {n});
-        bb.push_back(result);
-        global_code.push_back(result);
-    }
-}
-
-u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
-    // Ignore sched instructions when generating code.
-    if (IsSchedInstruction(pc, main_offset)) {
-        return pc + 1;
-    }
-
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-    const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
-
-    // Decoding failure
-    if (!opcode) {
-        UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
-        bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
-                                         nv_address, instr.value)));
-        return pc + 1;
-    }
-
-    bb.push_back(Comment(
-        fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
-
-    using Tegra::Shader::Pred;
-    UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
-                         "NeverExecute predicate not implemented");
-
-    static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
-        {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
-        {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
-        {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
-        {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
-        {OpCode::Type::Shift, &ShaderIR::DecodeShift},
-        {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
-        {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
-        {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
-        {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
-        {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
-        {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
-        {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
-        {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
-        {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
-        {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
-        {OpCode::Type::Image, &ShaderIR::DecodeImage},
-        {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
-        {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
-        {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
-        {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
-        {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
-        {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
-        {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
-        {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
-        {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
-        {OpCode::Type::Video, &ShaderIR::DecodeVideo},
-        {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
-    };
-
-    std::vector<Node> tmp_block;
-    if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
-        pc = (this->*decoder->second)(tmp_block, pc);
-    } else {
-        pc = DecodeOther(tmp_block, pc);
-    }
-
-    // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
-    // executed.
-    const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
-    const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-
-    if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
-        const Node conditional =
-            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
-        global_code.push_back(conditional);
-        bb.push_back(conditional);
-    } else {
-        for (auto& node : tmp_block) {
-            global_code.push_back(node);
-            bb.push_back(node);
-        }
-    }
-
-    return pc + 1;
-}
-
-void ShaderIR::PostDecode() {
-    // Deduce texture handler size if needed
-    auto gpu_driver = registry.AccessGuestDriverProfile();
-    DeduceTextureHandlerSize(gpu_driver, used_samplers);
-    // Deduce Indexed Samplers
-    if (!uses_indexed_samplers) {
-        return;
-    }
-    for (auto& sampler : used_samplers) {
-        if (!sampler.is_indexed) {
-            continue;
-        }
-        if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
-            sampler.size = *size;
-        } else {
-            LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
-            sampler.size = 1;
-        }
-    }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
deleted file mode 100644
index 15eb700e7..000000000
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::SubOp;
-
-u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    Node op_a = GetRegister(instr.gpr8);
-
-    Node op_b = [&] {
-        if (instr.is_b_imm) {
-            return GetImmediate19(instr);
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::MOV_C:
-    case OpCode::Id::MOV_R: {
-        // MOV does not have neither 'abs' nor 'neg' bits.
-        SetRegister(bb, instr.gpr0, op_b);
-        break;
-    }
-    case OpCode::Id::FMUL_C:
-    case OpCode::Id::FMUL_R:
-    case OpCode::Id::FMUL_IMM: {
-        // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
-        if (instr.fmul.tab5cb8_2 != 0) {
-            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
-                      instr.fmul.tab5cb8_2.Value());
-        }
-        if (instr.fmul.tab5c68_0 != 1) {
-            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
-                      instr.fmul.tab5c68_0.Value());
-        }
-
-        op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
-
-        static constexpr std::array FmulPostFactor = {
-            1.000f, // None
-            0.500f, // Divide 2
-            0.250f, // Divide 4
-            0.125f, // Divide 8
-            8.000f, // Mul 8
-            4.000f, // Mul 4
-            2.000f, // Mul 2
-        };
-
-        if (instr.fmul.postfactor != 0) {
-            op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
-                             Immediate(FmulPostFactor[instr.fmul.postfactor]));
-        }
-
-        // TODO(Rodrigo): Should precise be used when there's a postfactor?
-        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
-
-        value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::FADD_C:
-    case OpCode::Id::FADD_R:
-    case OpCode::Id::FADD_IMM: {
-        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
-        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
-        Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
-        value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::MUFU: {
-        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
-
-        Node value = [&]() {
-            switch (instr.sub_op) {
-            case SubOp::Cos:
-                return Operation(OperationCode::FCos, PRECISE, op_a);
-            case SubOp::Sin:
-                return Operation(OperationCode::FSin, PRECISE, op_a);
-            case SubOp::Ex2:
-                return Operation(OperationCode::FExp2, PRECISE, op_a);
-            case SubOp::Lg2:
-                return Operation(OperationCode::FLog2, PRECISE, op_a);
-            case SubOp::Rcp:
-                return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
-            case SubOp::Rsq:
-                return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
-            case SubOp::Sqrt:
-                return Operation(OperationCode::FSqrt, PRECISE, op_a);
-            default:
-                UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
-                return Immediate(0);
-            }
-        }();
-        value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::FMNMX_C:
-    case OpCode::Id::FMNMX_R:
-    case OpCode::Id::FMNMX_IMM: {
-        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
-        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
-        const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
-
-        const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
-        const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
-        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
-
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::FCMP_RR:
-    case OpCode::Id::FCMP_RC:
-    case OpCode::Id::FCMP_IMMR: {
-        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
-        Node op_c = GetRegister(instr.gpr39);
-        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
-        SetRegister(
-            bb, instr.gpr0,
-            Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
-        break;
-    }
-    case OpCode::Id::RRO_C:
-    case OpCode::Id::RRO_R:
-    case OpCode::Id::RRO_IMM: {
-        LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
-
-        // Currently RRO is only implemented as a register move.
-        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-        SetRegister(bb, instr.gpr0, op_b);
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
deleted file mode 100644
index 88103fede..000000000
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::HalfType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    bool negate_a = false;
-    bool negate_b = false;
-    bool absolute_a = false;
-    bool absolute_b = false;
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::HADD2_R:
-        if (instr.alu_half.ftz == 0) {
-            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
-        }
-        negate_a = ((instr.value >> 43) & 1) != 0;
-        negate_b = ((instr.value >> 31) & 1) != 0;
-        absolute_a = ((instr.value >> 44) & 1) != 0;
-        absolute_b = ((instr.value >> 30) & 1) != 0;
-        break;
-    case OpCode::Id::HADD2_C:
-        if (instr.alu_half.ftz == 0) {
-            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
-        }
-        negate_a = ((instr.value >> 43) & 1) != 0;
-        negate_b = ((instr.value >> 56) & 1) != 0;
-        absolute_a = ((instr.value >> 44) & 1) != 0;
-        absolute_b = ((instr.value >> 54) & 1) != 0;
-        break;
-    case OpCode::Id::HMUL2_R:
-        negate_a = ((instr.value >> 43) & 1) != 0;
-        absolute_a = ((instr.value >> 44) & 1) != 0;
-        absolute_b = ((instr.value >> 30) & 1) != 0;
-        break;
-    case OpCode::Id::HMUL2_C:
-        negate_b = ((instr.value >> 31) & 1) != 0;
-        absolute_a = ((instr.value >> 44) & 1) != 0;
-        absolute_b = ((instr.value >> 54) & 1) != 0;
-        break;
-    default:
-        UNREACHABLE();
-        break;
-    }
-
-    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
-    op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
-
-    auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HADD2_C:
-        case OpCode::Id::HMUL2_C:
-            return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
-        case OpCode::Id::HADD2_R:
-        case OpCode::Id::HMUL2_R:
-            return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
-        default:
-            UNREACHABLE();
-            return {HalfType::F32, Immediate(0)};
-        }
-    }();
-    op_b = UnpackHalfFloat(op_b, type_b);
-    op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
-
-    Node value = [this, opcode, op_a, op_b = op_b] {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HADD2_C:
-        case OpCode::Id::HADD2_R:
-            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
-        case OpCode::Id::HMUL2_C:
-        case OpCode::Id::HMUL2_R:
-            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
-        default:
-            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
-            return Immediate(0);
-        }
-    }();
-    value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
-    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
-
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
deleted file mode 100644
index d179b9873..000000000
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
-        if (instr.alu_half_imm.ftz == 0) {
-            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
-        }
-    } else {
-        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
-            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
-        }
-    }
-
-    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
-    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
-
-    const Node op_b = UnpackHalfImmediate(instr, true);
-
-    Node value = [&]() {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HADD2_IMM:
-            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
-        case OpCode::Id::HMUL2_IMM:
-            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
-        default:
-            UNREACHABLE();
-            return Immediate(0);
-        }
-    }();
-
-    value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
-    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
-    SetRegister(bb, instr.gpr0, value);
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
deleted file mode 100644
index f1875967c..000000000
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::MOV32_IMM: {
-        SetRegister(bb, instr.gpr0, GetImmediate32(instr));
-        break;
-    }
-    case OpCode::Id::FMUL32_IMM: {
-        Node value =
-            Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
-        value = GetSaturatedFloat(value, instr.fmul32.saturate);
-
-        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::FADD32I: {
-        const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
-                                                instr.fadd32i.negate_a);
-        const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
-                                                instr.fadd32i.negate_b);
-
-        const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
-        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
-                          opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
deleted file mode 100644
index 7b5bb7003..000000000
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::IAdd3Height;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::Register;
-
-u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    Node op_a = GetRegister(instr.gpr8);
-    Node op_b = [&]() {
-        if (instr.is_b_imm) {
-            return Immediate(instr.alu.GetSignedImm20_20());
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::IADD_C:
-    case OpCode::Id::IADD_R:
-    case OpCode::Id::IADD_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
-        UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
-
-        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
-        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
-
-        Node value = Operation(OperationCode::UAdd, op_a, op_b);
-
-        if (instr.iadd.x) {
-            Node carry = GetInternalFlag(InternalFlag::Carry);
-            Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
-            value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
-        }
-
-        if (instr.generates_cc) {
-            const Node i0 = Immediate(0);
-
-            Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
-            Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
-            Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
-
-            Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
-            Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
-            Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
-            Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
-
-            SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
-            SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
-            SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
-            SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
-        }
-        SetRegister(bb, instr.gpr0, std::move(value));
-        break;
-    }
-    case OpCode::Id::IADD3_C:
-    case OpCode::Id::IADD3_R:
-    case OpCode::Id::IADD3_IMM: {
-        Node op_c = GetRegister(instr.gpr39);
-
-        const auto ApplyHeight = [&](IAdd3Height height, Node value) {
-            switch (height) {
-            case IAdd3Height::None:
-                return value;
-            case IAdd3Height::LowerHalfWord:
-                return BitfieldExtract(value, 0, 16);
-            case IAdd3Height::UpperHalfWord:
-                return BitfieldExtract(value, 16, 16);
-            default:
-                UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
-                return Immediate(0);
-            }
-        };
-
-        if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
-            op_a = ApplyHeight(instr.iadd3.height_a, op_a);
-            op_b = ApplyHeight(instr.iadd3.height_b, op_b);
-            op_c = ApplyHeight(instr.iadd3.height_c, op_c);
-        }
-
-        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
-        op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
-        op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
-
-        const Node value = [&] {
-            Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
-            if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
-                return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
-            }
-            const Node shifted = [&] {
-                switch (instr.iadd3.mode) {
-                case Tegra::Shader::IAdd3Mode::RightShift:
-                    // TODO(tech4me): According to
-                    // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
-                    // The addition between op_a and op_b should be done in uint33, more
-                    // investigation required
-                    return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
-                                     Immediate(16));
-                case Tegra::Shader::IAdd3Mode::LeftShift:
-                    return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
-                                     Immediate(16));
-                default:
-                    return add_ab;
-                }
-            }();
-            return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
-        }();
-
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::ISCADD_C:
-    case OpCode::Id::ISCADD_R:
-    case OpCode::Id::ISCADD_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in ISCADD is not implemented");
-
-        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
-        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
-
-        const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
-        const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
-        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
-
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::POPC_C:
-    case OpCode::Id::POPC_R:
-    case OpCode::Id::POPC_IMM: {
-        if (instr.popc.invert) {
-            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
-        }
-        const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::FLO_R:
-    case OpCode::Id::FLO_C:
-    case OpCode::Id::FLO_IMM: {
-        Node value;
-        if (instr.flo.invert) {
-            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
-        }
-        if (instr.flo.is_signed) {
-            value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
-        } else {
-            value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
-        }
-        if (instr.flo.sh) {
-            value =
-                Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
-        }
-        SetRegister(bb, instr.gpr0, std::move(value));
-        break;
-    }
-    case OpCode::Id::SEL_C:
-    case OpCode::Id::SEL_R:
-    case OpCode::Id::SEL_IMM: {
-        const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
-        const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::ICMP_CR:
-    case OpCode::Id::ICMP_R:
-    case OpCode::Id::ICMP_RC:
-    case OpCode::Id::ICMP_IMM: {
-        const Node zero = Immediate(0);
-
-        const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::ICMP_CR:
-                return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
-                        GetRegister(instr.gpr39)};
-            case OpCode::Id::ICMP_R:
-                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
-            case OpCode::Id::ICMP_RC:
-                return {GetRegister(instr.gpr39),
-                        GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
-            case OpCode::Id::ICMP_IMM:
-                return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
-            default:
-                UNREACHABLE();
-                return {zero, zero};
-            }
-        }();
-        const Node op_lhs = GetRegister(instr.gpr8);
-        const Node comparison =
-            GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
-        SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
-        break;
-    }
-    case OpCode::Id::LOP_C:
-    case OpCode::Id::LOP_R:
-    case OpCode::Id::LOP_IMM: {
-        if (instr.alu.lop.invert_a)
-            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
-        if (instr.alu.lop.invert_b)
-            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
-
-        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
-                            instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
-                            instr.generates_cc);
-        break;
-    }
-    case OpCode::Id::LOP3_C:
-    case OpCode::Id::LOP3_R:
-    case OpCode::Id::LOP3_IMM: {
-        const Node op_c = GetRegister(instr.gpr39);
-        const Node lut = [&]() {
-            if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
-                return Immediate(instr.alu.lop3.GetImmLut28());
-            } else {
-                return Immediate(instr.alu.lop3.GetImmLut48());
-            }
-        }();
-
-        WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
-        break;
-    }
-    case OpCode::Id::IMNMX_C:
-    case OpCode::Id::IMNMX_R:
-    case OpCode::Id::IMNMX_IMM: {
-        UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
-
-        const bool is_signed = instr.imnmx.is_signed;
-
-        const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
-        const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
-        const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
-        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
-
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::LEA_R2:
-    case OpCode::Id::LEA_R1:
-    case OpCode::Id::LEA_IMM:
-    case OpCode::Id::LEA_RZ:
-    case OpCode::Id::LEA_HI: {
-        auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::LEA_R2: {
-                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
-                        Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
-            }
-            case OpCode::Id::LEA_R1: {
-                const bool neg = instr.lea.r1.neg != 0;
-                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
-                        GetRegister(instr.gpr20),
-                        Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
-            }
-            case OpCode::Id::LEA_IMM: {
-                const bool neg = instr.lea.imm.neg != 0;
-                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
-                        Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
-                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
-            }
-            case OpCode::Id::LEA_RZ: {
-                const bool neg = instr.lea.rz.neg != 0;
-                return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
-                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
-                        Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
-            }
-            case OpCode::Id::LEA_HI:
-            default:
-                UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
-
-                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
-                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
-            }
-        }();
-
-        UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
-                             "Unhandled LEA Predicate");
-
-        Node value =
-            Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
-        value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
-        SetRegister(bb, instr.gpr0, std::move(value));
-
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
-                                    Node imm_lut, bool sets_cc) {
-    const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
-        Node value = Immediate(0);
-        const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
-        if (imm.GetValue() & 0x01) {
-            const Node a = Operation(OperationCode::IBitwiseNot, na);
-            const Node b = Operation(OperationCode::IBitwiseNot, nb);
-            const Node c = Operation(OperationCode::IBitwiseNot, nc);
-            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
-            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
-            value = Operation(OperationCode::IBitwiseOr, value, r);
-        }
-        if (imm.GetValue() & 0x02) {
-            const Node a = Operation(OperationCode::IBitwiseNot, na);
-            const Node b = Operation(OperationCode::IBitwiseNot, nb);
-            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
-            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
-            value = Operation(OperationCode::IBitwiseOr, value, r);
-        }
-        if (imm.GetValue() & 0x04) {
-            const Node a = Operation(OperationCode::IBitwiseNot, na);
-            const Node c = Operation(OperationCode::IBitwiseNot, nc);
-            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
-            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
-            value = Operation(OperationCode::IBitwiseOr, value, r);
-        }
-        if (imm.GetValue() & 0x08) {
-            const Node a = Operation(OperationCode::IBitwiseNot, na);
-            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
-            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
-            value = Operation(OperationCode::IBitwiseOr, value, r);
-        }
-        if (imm.GetValue() & 0x10) {
-            const Node b = Operation(OperationCode::IBitwiseNot, nb);
-            const Node c = Operation(OperationCode::IBitwiseNot, nc);
-            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
-            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
-            value = Operation(OperationCode::IBitwiseOr, value, r);
-        }
-        if (imm.GetValue() & 0x20) {
-            const Node b = Operation(OperationCode::IBitwiseNot, nb);
-            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
-            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
-            value = Operation(OperationCode::IBitwiseOr, value, r);
-        }
-        if (imm.GetValue() & 0x40) {
-            const Node c = Operation(OperationCode::IBitwiseNot, nc);
-            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
-            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
-            value = Operation(OperationCode::IBitwiseOr, value, r);
-        }
-        if (imm.GetValue() & 0x80) {
-            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
-            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
-            value = Operation(OperationCode::IBitwiseOr, value, r);
-        }
-        return value;
-    }(op_a, op_b, op_c, imm_lut);
-
-    SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
-    SetRegister(bb, dest, lop3_fast);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
deleted file mode 100644
index 73580277a..000000000
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::LogicOperation;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::PredicateResultMode;
-using Tegra::Shader::Register;
-
-u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    Node op_a = GetRegister(instr.gpr8);
-    Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::IADD32I: {
-        UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
-
-        op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
-
-        Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
-
-        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
-        SetRegister(bb, instr.gpr0, std::move(value));
-        break;
-    }
-    case OpCode::Id::LOP32I: {
-        if (instr.alu.lop32i.invert_a) {
-            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
-        }
-
-        if (instr.alu.lop32i.invert_b) {
-            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
-        }
-
-        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
-                            std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
-                            instr.op_32.generates_cc != 0);
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
-                          opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
-                                   Node op_b, PredicateResultMode predicate_mode, Pred predicate,
-                                   bool sets_cc) {
-    Node result = [&] {
-        switch (logic_op) {
-        case LogicOperation::And:
-            return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
-        case LogicOperation::Or:
-            return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
-        case LogicOperation::Xor:
-            return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
-        case LogicOperation::PassB:
-            return op_b;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
-            return Immediate(0);
-        }
-    }();
-
-    SetInternalFlagsFromInteger(bb, result, sets_cc);
-    SetRegister(bb, dest, result);
-
-    // Write the predicate value depending on the predicate mode.
-    switch (predicate_mode) {
-    case PredicateResultMode::None:
-        // Do nothing.
-        return;
-    case PredicateResultMode::NotZero: {
-        // Set the predicate to true if the result is not zero.
-        Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
-        SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
-    }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
deleted file mode 100644
index 8e3b46e8e..000000000
--- a/src/video_core/shader/decode/bfe.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    Node op_a = GetRegister(instr.gpr8);
-    Node op_b = [&] {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::BFE_R:
-            return GetRegister(instr.gpr20);
-        case OpCode::Id::BFE_C:
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        case OpCode::Id::BFE_IMM:
-            return Immediate(instr.alu.GetSignedImm20_20());
-        default:
-            UNREACHABLE();
-            return Immediate(0);
-        }
-    }();
-
-    UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
-
-    const bool is_signed = instr.bfe.is_signed;
-
-    // using reverse parallel method in
-    // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
-    // note for later if possible to implement faster method.
-    if (instr.bfe.brev) {
-        const auto swap = [&](u32 s, u32 mask) {
-            Node v1 =
-                SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
-            if (mask != 0) {
-                v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
-                                     Immediate(mask));
-            }
-            Node v2 = op_a;
-            if (mask != 0) {
-                v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
-                                     Immediate(mask));
-            }
-            v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
-                                 Immediate(s));
-            return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
-                                   std::move(v2));
-        };
-        op_a = swap(1, 0x55555555U);
-        op_a = swap(2, 0x33333333U);
-        op_a = swap(4, 0x0F0F0F0FU);
-        op_a = swap(8, 0x00FF00FFU);
-        op_a = swap(16, 0);
-    }
-
-    const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
-                                        Immediate(0), Immediate(8));
-    const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
-                                      Immediate(8), Immediate(8));
-    auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
-    SetRegister(bb, instr.gpr0, std::move(result));
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
deleted file mode 100644
index 70d1c055b..000000000
--- a/src/video_core/shader/decode/bfi.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::BFI_RC:
-            return {GetRegister(instr.gpr39),
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
-        case OpCode::Id::BFI_IMM_R:
-            return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
-        default:
-            UNREACHABLE();
-            return {Immediate(0), Immediate(0)};
-        }
-    }();
-    const Node insert = GetRegister(instr.gpr8);
-    const Node offset = BitfieldExtract(packed_shift, 0, 8);
-    const Node bits = BitfieldExtract(packed_shift, 8, 8);
-
-    const Node value =
-        Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
-
-    SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
deleted file mode 100644
index fea7a54df..000000000
--- a/src/video_core/shader/decode/conversion.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <limits>
-#include <optional>
-#include <utility>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-
-namespace {
-
-constexpr OperationCode GetFloatSelector(u64 selector) {
-    return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
-}
-
-constexpr u32 SizeInBits(Register::Size size) {
-    switch (size) {
-    case Register::Size::Byte:
-        return 8;
-    case Register::Size::Short:
-        return 16;
-    case Register::Size::Word:
-        return 32;
-    case Register::Size::Long:
-        return 64;
-    }
-    return 0;
-}
-
-constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
-                                                                   Register::Size dst_size,
-                                                                   bool src_signed,
-                                                                   bool dst_signed) {
-    const u32 dst_bits = SizeInBits(dst_size);
-    if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
-        if (src_signed == dst_signed) {
-            return std::nullopt;
-        }
-        return std::make_pair(0, std::numeric_limits<s32>::max());
-    }
-    if (dst_signed) {
-        // Signed destination, clamp to [-128, 127] for instance
-        return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
-    } else {
-        // Unsigned destination
-        if (dst_bits == 32) {
-            // Avoid shifting by 32, that is undefined behavior
-            return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
-        }
-        return std::make_pair(0, (1 << dst_bits) - 1);
-    }
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::I2I_R:
-    case OpCode::Id::I2I_C:
-    case OpCode::Id::I2I_IMM: {
-        const bool src_signed = instr.conversion.is_input_signed;
-        const bool dst_signed = instr.conversion.is_output_signed;
-        const Register::Size src_size = instr.conversion.src_size;
-        const Register::Size dst_size = instr.conversion.dst_size;
-        const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
-
-        Node value = [this, instr, opcode] {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::I2I_R:
-                return GetRegister(instr.gpr20);
-            case OpCode::Id::I2I_C:
-                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-            case OpCode::Id::I2I_IMM:
-                return Immediate(instr.alu.GetSignedImm20_20());
-            default:
-                UNREACHABLE();
-                return Immediate(0);
-            }
-        }();
-
-        // Ensure the source selector is valid
-        switch (instr.conversion.src_size) {
-        case Register::Size::Byte:
-            break;
-        case Register::Size::Short:
-            ASSERT(selector == 0 || selector == 2);
-            break;
-        default:
-            ASSERT(selector == 0);
-            break;
-        }
-
-        if (src_size != Register::Size::Word || selector != 0) {
-            value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
-                                    Immediate(selector * 8), Immediate(SizeInBits(src_size)));
-        }
-
-        value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
-                                        instr.conversion.negate_a, src_signed);
-
-        if (instr.alu.saturate_d) {
-            if (src_signed && !dst_signed) {
-                Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
-                                             Immediate(1 << (SizeInBits(src_size) - 1)));
-                value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
-                                  std::move(value));
-
-                // Simplify generated expressions, this can be removed without semantic impact
-                SetTemporary(bb, 0, std::move(value));
-                value = GetTemporary(0);
-
-                if (dst_size != Register::Size::Word) {
-                    const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
-                    Node is_large =
-                        Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
-                    value = Operation(OperationCode::Select, std::move(is_large), limit,
-                                      std::move(value));
-                }
-            } else if (const std::optional bounds =
-                           IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
-                value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
-                                        Immediate(bounds->first));
-                value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
-                                        Immediate(bounds->second));
-            }
-        } else if (dst_size != Register::Size::Word) {
-            // No saturation, we only have to mask the result
-            Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
-            value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
-        }
-
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, std::move(value));
-        break;
-    }
-    case OpCode::Id::I2F_R:
-    case OpCode::Id::I2F_C:
-    case OpCode::Id::I2F_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in I2F is not implemented");
-
-        Node value = [&] {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::I2F_R:
-                return GetRegister(instr.gpr20);
-            case OpCode::Id::I2F_C:
-                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-            case OpCode::Id::I2F_IMM:
-                return Immediate(instr.alu.GetSignedImm20_20());
-            default:
-                UNREACHABLE();
-                return Immediate(0);
-            }
-        }();
-
-        const bool input_signed = instr.conversion.is_input_signed;
-
-        if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
-            ASSERT(instr.conversion.src_size == Register::Size::Byte ||
-                   instr.conversion.src_size == Register::Size::Short);
-            if (instr.conversion.src_size == Register::Size::Short) {
-                ASSERT(offset == 0 || offset == 2);
-            }
-            value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
-                                    std::move(value), Immediate(offset * 8));
-        }
-
-        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
-        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
-        value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
-        value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
-
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-
-        if (instr.conversion.dst_size == Register::Size::Short) {
-            value = Operation(OperationCode::HCastFloat, PRECISE, value);
-        }
-
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::F2F_R:
-    case OpCode::Id::F2F_C:
-    case OpCode::Id::F2F_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
-        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in F2F is not implemented");
-
-        Node value = [&]() {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::F2F_R:
-                return GetRegister(instr.gpr20);
-            case OpCode::Id::F2F_C:
-                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-            case OpCode::Id::F2F_IMM:
-                return GetImmediate19(instr);
-            default:
-                UNREACHABLE();
-                return Immediate(0);
-            }
-        }();
-
-        if (instr.conversion.src_size == Register::Size::Short) {
-            value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
-                              std::move(value));
-        } else {
-            ASSERT(instr.conversion.float_src.selector == 0);
-        }
-
-        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
-
-        value = [&] {
-            if (instr.conversion.src_size != instr.conversion.dst_size) {
-                // Rounding operations only matter when the source and destination conversion size
-                // is the same.
-                return value;
-            }
-            switch (instr.conversion.f2f.GetRoundingMode()) {
-            case Tegra::Shader::F2fRoundingOp::None:
-                return value;
-            case Tegra::Shader::F2fRoundingOp::Round:
-                return Operation(OperationCode::FRoundEven, value);
-            case Tegra::Shader::F2fRoundingOp::Floor:
-                return Operation(OperationCode::FFloor, value);
-            case Tegra::Shader::F2fRoundingOp::Ceil:
-                return Operation(OperationCode::FCeil, value);
-            case Tegra::Shader::F2fRoundingOp::Trunc:
-                return Operation(OperationCode::FTrunc, value);
-            default:
-                UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
-                                  instr.conversion.f2f.rounding.Value());
-                return value;
-            }
-        }();
-        value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-
-        if (instr.conversion.dst_size == Register::Size::Short) {
-            value = Operation(OperationCode::HCastFloat, PRECISE, value);
-        }
-
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::F2I_R:
-    case OpCode::Id::F2I_C:
-    case OpCode::Id::F2I_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in F2I is not implemented");
-        Node value = [&]() {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::F2I_R:
-                return GetRegister(instr.gpr20);
-            case OpCode::Id::F2I_C:
-                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-            case OpCode::Id::F2I_IMM:
-                return GetImmediate19(instr);
-            default:
-                UNREACHABLE();
-                return Immediate(0);
-            }
-        }();
-
-        if (instr.conversion.src_size == Register::Size::Short) {
-            value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
-                              std::move(value));
-        } else {
-            ASSERT(instr.conversion.float_src.selector == 0);
-        }
-
-        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
-
-        value = [&]() {
-            switch (instr.conversion.f2i.rounding) {
-            case Tegra::Shader::F2iRoundingOp::RoundEven:
-                return Operation(OperationCode::FRoundEven, PRECISE, value);
-            case Tegra::Shader::F2iRoundingOp::Floor:
-                return Operation(OperationCode::FFloor, PRECISE, value);
-            case Tegra::Shader::F2iRoundingOp::Ceil:
-                return Operation(OperationCode::FCeil, PRECISE, value);
-            case Tegra::Shader::F2iRoundingOp::Trunc:
-                return Operation(OperationCode::FTrunc, PRECISE, value);
-            default:
-                UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
-                                  instr.conversion.f2i.rounding.Value());
-                return Immediate(0);
-            }
-        }();
-        const bool is_signed = instr.conversion.is_output_signed;
-        value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
-        value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
-
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
deleted file mode 100644
index 5973588d6..000000000
--- a/src/video_core/shader/decode/ffma.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
-    if (instr.ffma.tab5980_0 != 1) {
-        LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
-    }
-    if (instr.ffma.tab5980_1 != 0) {
-        LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
-    }
-
-    const Node op_a = GetRegister(instr.gpr8);
-
-    auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::FFMA_CR: {
-            return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
-                    GetRegister(instr.gpr39)};
-        }
-        case OpCode::Id::FFMA_RR:
-            return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
-        case OpCode::Id::FFMA_RC: {
-            return {GetRegister(instr.gpr39),
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
-        }
-        case OpCode::Id::FFMA_IMM:
-            return {GetImmediate19(instr), GetRegister(instr.gpr39)};
-        default:
-            UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
-            return {Immediate(0), Immediate(0)};
-        }
-    }();
-
-    op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
-    op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
-
-    Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
-    value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
-    SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
deleted file mode 100644
index 5614e8a0d..000000000
--- a/src/video_core/shader/decode/float_set.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-
-    const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
-                                            instr.fset.neg_a != 0);
-
-    Node op_b = [&]() {
-        if (instr.is_b_imm) {
-            return GetImmediate19(instr);
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-
-    op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
-
-    // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
-    // condition is true, and to 0 otherwise.
-    const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
-
-    const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
-    const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
-
-    const Node predicate = Operation(combiner, first_pred, second_pred);
-
-    const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
-    const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
-    const Node value =
-        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
-    if (instr.fset.bf) {
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-    } else {
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-    }
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
deleted file mode 100644
index 200c2c983..000000000
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-
-    Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
-                                      instr.fsetp.neg_a != 0);
-    Node op_b = [&]() {
-        if (instr.is_b_imm) {
-            return GetImmediate19(instr);
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-    op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
-
-    // We can't use the constant predicate as destination.
-    ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
-    const Node predicate =
-        GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
-    const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
-
-    const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
-    const Node value = Operation(combiner, predicate, second_pred);
-
-    // Set the primary predicate to the result of Predicate OP SecondPredicate
-    SetPredicate(bb, instr.fsetp.pred3, value);
-
-    if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-        // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
-        // if enabled
-        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
-        const Node second_value = Operation(combiner, negated_pred, second_pred);
-        SetPredicate(bb, instr.fsetp.pred0, second_value);
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
deleted file mode 100644
index fa83108cd..000000000
--- a/src/video_core/shader/decode/half_set.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-
-u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    PredCondition cond{};
-    bool bf = false;
-    bool ftz = false;
-    bool neg_a = false;
-    bool abs_a = false;
-    bool neg_b = false;
-    bool abs_b = false;
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::HSET2_C:
-    case OpCode::Id::HSET2_IMM:
-        cond = instr.hsetp2.cbuf_and_imm.cond;
-        bf = instr.Bit(53);
-        ftz = instr.Bit(54);
-        neg_a = instr.Bit(43);
-        abs_a = instr.Bit(44);
-        neg_b = instr.Bit(56);
-        abs_b = instr.Bit(54);
-        break;
-    case OpCode::Id::HSET2_R:
-        cond = instr.hsetp2.reg.cond;
-        bf = instr.Bit(49);
-        ftz = instr.Bit(50);
-        neg_a = instr.Bit(43);
-        abs_a = instr.Bit(44);
-        neg_b = instr.Bit(31);
-        abs_b = instr.Bit(30);
-        break;
-    default:
-        UNREACHABLE();
-    }
-
-    Node op_b = [this, instr, opcode] {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HSET2_C:
-            // Inform as unimplemented as this is not tested.
-            UNIMPLEMENTED_MSG("HSET2_C is not implemented");
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        case OpCode::Id::HSET2_R:
-            return GetRegister(instr.gpr20);
-        case OpCode::Id::HSET2_IMM:
-            return UnpackHalfImmediate(instr, true);
-        default:
-            UNREACHABLE();
-            return Node{};
-        }
-    }();
-
-    if (!ftz) {
-        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
-    }
-
-    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
-    op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::HSET2_R:
-        op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
-        [[fallthrough]];
-    case OpCode::Id::HSET2_C:
-        op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
-        break;
-    default:
-        break;
-    }
-
-    Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
-
-    Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
-
-    const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
-
-    // HSET2 operates on each half float in the pack.
-    std::array<Node, 2> values;
-    for (u32 i = 0; i < 2; ++i) {
-        const u32 raw_value = bf ? 0x3c00 : 0xffff;
-        Node true_value = Immediate(raw_value << (i * 16));
-        Node false_value = Immediate(0);
-
-        Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
-        Node predicate = Operation(combiner, comparison, second_pred);
-        values[i] =
-            Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
-    }
-
-    Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
-    SetRegister(bb, instr.gpr0, move(value));
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
deleted file mode 100644
index 310655619..000000000
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    if (instr.hsetp2.ftz != 0) {
-        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
-    }
-
-    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
-    op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
-
-    Tegra::Shader::PredCondition cond{};
-    bool h_and{};
-    Node op_b{};
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::HSETP2_C:
-        cond = instr.hsetp2.cbuf_and_imm.cond;
-        h_and = instr.hsetp2.cbuf_and_imm.h_and;
-        op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
-                                    instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
-        // F32 is hardcoded in hardware
-        op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
-        break;
-    case OpCode::Id::HSETP2_IMM:
-        cond = instr.hsetp2.cbuf_and_imm.cond;
-        h_and = instr.hsetp2.cbuf_and_imm.h_and;
-        op_b = UnpackHalfImmediate(instr, true);
-        break;
-    case OpCode::Id::HSETP2_R:
-        cond = instr.hsetp2.reg.cond;
-        h_and = instr.hsetp2.reg.h_and;
-        op_b =
-            GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
-                                 instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
-        break;
-    default:
-        UNREACHABLE();
-        op_b = Immediate(0);
-    }
-
-    const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
-    const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
-
-    const auto Write = [&](u64 dest, Node src) {
-        SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
-    };
-
-    const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
-    const u64 first = instr.hsetp2.pred3;
-    const u64 second = instr.hsetp2.pred0;
-    if (h_and) {
-        Node joined = Operation(OperationCode::LogicalAnd2, comparison);
-        Write(first, joined);
-        Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
-    } else {
-        Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
-        Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
deleted file mode 100644
index 5b44cb79c..000000000
--- a/src/video_core/shader/decode/hfma2.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <tuple>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::HalfPrecision;
-using Tegra::Shader::HalfType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
-        DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
-    } else {
-        DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
-    }
-
-    constexpr auto identity = HalfType::H0_H1;
-    bool neg_b{}, neg_c{};
-    auto [saturate, type_b, op_b, type_c,
-          op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HFMA2_CR:
-            neg_b = instr.hfma2.negate_b;
-            neg_c = instr.hfma2.negate_c;
-            return {instr.hfma2.saturate, HalfType::F32,
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
-                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
-        case OpCode::Id::HFMA2_RC:
-            neg_b = instr.hfma2.negate_b;
-            neg_c = instr.hfma2.negate_c;
-            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
-                    HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
-        case OpCode::Id::HFMA2_RR:
-            neg_b = instr.hfma2.rr.negate_b;
-            neg_c = instr.hfma2.rr.negate_c;
-            return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
-                    instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
-        case OpCode::Id::HFMA2_IMM_R:
-            neg_c = instr.hfma2.negate_c;
-            return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
-                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
-        default:
-            return {false, identity, Immediate(0), identity, Immediate(0)};
-        }
-    }();
-
-    const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
-    op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
-    op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
-
-    Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
-    value = GetSaturatedHalfFloat(value, saturate);
-    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
-
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
deleted file mode 100644
index 5470e8cf4..000000000
--- a/src/video_core/shader/decode/image.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/textures/texture.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-using Tegra::Shader::StoreType;
-using Tegra::Texture::ComponentType;
-using Tegra::Texture::TextureFormat;
-using Tegra::Texture::TICEntry;
-
-namespace {
-
-ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
-                               std::size_t component) {
-    const TextureFormat format{descriptor.format};
-    switch (format) {
-    case TextureFormat::R16G16B16A16:
-    case TextureFormat::R32G32B32A32:
-    case TextureFormat::R32G32B32:
-    case TextureFormat::R32G32:
-    case TextureFormat::R16G16:
-    case TextureFormat::R32:
-    case TextureFormat::R16:
-    case TextureFormat::R8:
-    case TextureFormat::R1:
-        if (component == 0) {
-            return descriptor.r_type;
-        }
-        if (component == 1) {
-            return descriptor.g_type;
-        }
-        if (component == 2) {
-            return descriptor.b_type;
-        }
-        if (component == 3) {
-            return descriptor.a_type;
-        }
-        break;
-    case TextureFormat::A8R8G8B8:
-        if (component == 0) {
-            return descriptor.a_type;
-        }
-        if (component == 1) {
-            return descriptor.r_type;
-        }
-        if (component == 2) {
-            return descriptor.g_type;
-        }
-        if (component == 3) {
-            return descriptor.b_type;
-        }
-        break;
-    case TextureFormat::A2B10G10R10:
-    case TextureFormat::A4B4G4R4:
-    case TextureFormat::A5B5G5R1:
-    case TextureFormat::A1B5G5R5:
-        if (component == 0) {
-            return descriptor.a_type;
-        }
-        if (component == 1) {
-            return descriptor.b_type;
-        }
-        if (component == 2) {
-            return descriptor.g_type;
-        }
-        if (component == 3) {
-            return descriptor.r_type;
-        }
-        break;
-    case TextureFormat::R32_B24G8:
-        if (component == 0) {
-            return descriptor.r_type;
-        }
-        if (component == 1) {
-            return descriptor.b_type;
-        }
-        if (component == 2) {
-            return descriptor.g_type;
-        }
-        break;
-    case TextureFormat::B5G6R5:
-    case TextureFormat::B6G5R5:
-    case TextureFormat::B10G11R11:
-        if (component == 0) {
-            return descriptor.b_type;
-        }
-        if (component == 1) {
-            return descriptor.g_type;
-        }
-        if (component == 2) {
-            return descriptor.r_type;
-        }
-        break;
-    case TextureFormat::R24G8:
-    case TextureFormat::R8G24:
-    case TextureFormat::R8G8:
-    case TextureFormat::G4R4:
-        if (component == 0) {
-            return descriptor.g_type;
-        }
-        if (component == 1) {
-            return descriptor.r_type;
-        }
-        break;
-    default:
-        break;
-    }
-    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
-    return ComponentType::FLOAT;
-}
-
-bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
-    constexpr u8 R = 0b0001;
-    constexpr u8 G = 0b0010;
-    constexpr u8 B = 0b0100;
-    constexpr u8 A = 0b1000;
-    constexpr std::array<u8, 16> mask = {
-        0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B),
-        (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
-    return std::bitset<4>{mask.at(component_mask)}.test(component);
-}
-
-u32 GetComponentSize(TextureFormat format, std::size_t component) {
-    switch (format) {
-    case TextureFormat::R32G32B32A32:
-        return 32;
-    case TextureFormat::R16G16B16A16:
-        return 16;
-    case TextureFormat::R32G32B32:
-        return component <= 2 ? 32 : 0;
-    case TextureFormat::R32G32:
-        return component <= 1 ? 32 : 0;
-    case TextureFormat::R16G16:
-        return component <= 1 ? 16 : 0;
-    case TextureFormat::R32:
-        return component == 0 ? 32 : 0;
-    case TextureFormat::R16:
-        return component == 0 ? 16 : 0;
-    case TextureFormat::R8:
-        return component == 0 ? 8 : 0;
-    case TextureFormat::R1:
-        return component == 0 ? 1 : 0;
-    case TextureFormat::A8R8G8B8:
-        return 8;
-    case TextureFormat::A2B10G10R10:
-        return (component == 3 || component == 2 || component == 1) ? 10 : 2;
-    case TextureFormat::A4B4G4R4:
-        return 4;
-    case TextureFormat::A5B5G5R1:
-        return (component == 0 || component == 1 || component == 2) ? 5 : 1;
-    case TextureFormat::A1B5G5R5:
-        return (component == 1 || component == 2 || component == 3) ? 5 : 1;
-    case TextureFormat::R32_B24G8:
-        if (component == 0) {
-            return 32;
-        }
-        if (component == 1) {
-            return 24;
-        }
-        if (component == 2) {
-            return 8;
-        }
-        return 0;
-    case TextureFormat::B5G6R5:
-        if (component == 0 || component == 2) {
-            return 5;
-        }
-        if (component == 1) {
-            return 6;
-        }
-        return 0;
-    case TextureFormat::B6G5R5:
-        if (component == 1 || component == 2) {
-            return 5;
-        }
-        if (component == 0) {
-            return 6;
-        }
-        return 0;
-    case TextureFormat::B10G11R11:
-        if (component == 1 || component == 2) {
-            return 11;
-        }
-        if (component == 0) {
-            return 10;
-        }
-        return 0;
-    case TextureFormat::R24G8:
-        if (component == 0) {
-            return 8;
-        }
-        if (component == 1) {
-            return 24;
-        }
-        return 0;
-    case TextureFormat::R8G24:
-        if (component == 0) {
-            return 24;
-        }
-        if (component == 1) {
-            return 8;
-        }
-        return 0;
-    case TextureFormat::R8G8:
-        return (component == 0 || component == 1) ? 8 : 0;
-    case TextureFormat::G4R4:
-        return (component == 0 || component == 1) ? 4 : 0;
-    default:
-        UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
-        return 0;
-    }
-}
-
-std::size_t GetImageComponentMask(TextureFormat format) {
-    constexpr u8 R = 0b0001;
-    constexpr u8 G = 0b0010;
-    constexpr u8 B = 0b0100;
-    constexpr u8 A = 0b1000;
-    switch (format) {
-    case TextureFormat::R32G32B32A32:
-    case TextureFormat::R16G16B16A16:
-    case TextureFormat::A8R8G8B8:
-    case TextureFormat::A2B10G10R10:
-    case TextureFormat::A4B4G4R4:
-    case TextureFormat::A5B5G5R1:
-    case TextureFormat::A1B5G5R5:
-        return std::size_t{R | G | B | A};
-    case TextureFormat::R32G32B32:
-    case TextureFormat::R32_B24G8:
-    case TextureFormat::B5G6R5:
-    case TextureFormat::B6G5R5:
-    case TextureFormat::B10G11R11:
-        return std::size_t{R | G | B};
-    case TextureFormat::R32G32:
-    case TextureFormat::R16G16:
-    case TextureFormat::R24G8:
-    case TextureFormat::R8G24:
-    case TextureFormat::R8G8:
-    case TextureFormat::G4R4:
-        return std::size_t{R | G};
-    case TextureFormat::R32:
-    case TextureFormat::R16:
-    case TextureFormat::R8:
-    case TextureFormat::R1:
-        return std::size_t{R};
-    default:
-        UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
-        return std::size_t{R | G | B | A};
-    }
-}
-
-std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
-    switch (image_type) {
-    case Tegra::Shader::ImageType::Texture1D:
-    case Tegra::Shader::ImageType::TextureBuffer:
-        return 1;
-    case Tegra::Shader::ImageType::Texture1DArray:
-    case Tegra::Shader::ImageType::Texture2D:
-        return 2;
-    case Tegra::Shader::ImageType::Texture2DArray:
-    case Tegra::Shader::ImageType::Texture3D:
-        return 3;
-    }
-    UNREACHABLE();
-    return 1;
-}
-} // Anonymous namespace
-
-std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
-                                                  Node original_value) {
-    switch (component_type) {
-    case ComponentType::SNORM: {
-        // range [-1.0, 1.0]
-        auto cnv_value = Operation(OperationCode::FMul, original_value,
-                                   Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
-        cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
-        return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
-    }
-    case ComponentType::SINT:
-    case ComponentType::UNORM: {
-        bool is_signed = component_type == ComponentType::SINT;
-        // range [0.0, 1.0]
-        auto cnv_value = Operation(OperationCode::FMul, original_value,
-                                   Immediate(static_cast<float>(1 << component_size) - 1.f));
-        return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
-                is_signed};
-    }
-    case ComponentType::UINT: // range [0, (1 << component_size) - 1]
-        return {std::move(original_value), false};
-    case ComponentType::FLOAT:
-        if (component_size == 16) {
-            return {Operation(OperationCode::HCastFloat, original_value), true};
-        } else {
-            return {std::move(original_value), true};
-        }
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
-        return {std::move(original_value), true};
-    }
-}
-
-u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
-        std::vector<Node> coords;
-        const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
-        coords.reserve(num_coords);
-        for (std::size_t i = 0; i < num_coords; ++i) {
-            coords.push_back(GetRegister(instr.gpr8.Value() + i));
-        }
-        return coords;
-    };
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::SULD: {
-        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
-                         Tegra::Shader::OutOfBoundsStore::Ignore);
-
-        const auto type{instr.suldst.image_type};
-        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
-                                              : GetBindlessImage(instr.gpr39, type)};
-        image.MarkRead();
-
-        if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
-            u32 indexer = 0;
-            for (u32 element = 0; element < 4; ++element) {
-                if (!instr.suldst.IsComponentEnabled(element)) {
-                    continue;
-                }
-                MetaImage meta{image, {}, element};
-                Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
-                SetTemporary(bb, indexer++, std::move(value));
-            }
-            for (u32 i = 0; i < indexer; ++i) {
-                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-            }
-        } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
-            UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
-                             instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
-
-            auto descriptor = [this, instr] {
-                std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
-                if (instr.suldst.is_immediate) {
-                    sampler_descriptor =
-                        registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
-                } else {
-                    const Node image_register = GetRegister(instr.gpr39);
-                    const auto result = TrackCbuf(image_register, global_code,
-                                                  static_cast<s64>(global_code.size()));
-                    const auto buffer = std::get<1>(result);
-                    const auto offset = std::get<2>(result);
-                    sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
-                }
-                if (!sampler_descriptor) {
-                    UNREACHABLE_MSG("Failed to obtain image descriptor");
-                }
-                return *sampler_descriptor;
-            }();
-
-            const auto comp_mask = GetImageComponentMask(descriptor.format);
-
-            switch (instr.suldst.GetStoreDataLayout()) {
-            case StoreType::Bits32:
-            case StoreType::Bits64: {
-                u32 indexer = 0;
-                u32 shifted_counter = 0;
-                Node value = Immediate(0);
-                for (u32 element = 0; element < 4; ++element) {
-                    if (!IsComponentEnabled(comp_mask, element)) {
-                        continue;
-                    }
-                    const auto component_type = GetComponentType(descriptor, element);
-                    const auto component_size = GetComponentSize(descriptor.format, element);
-                    MetaImage meta{image, {}, element};
-
-                    auto [converted_value, is_signed] = GetComponentValue(
-                        component_type, component_size,
-                        Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
-
-                    // shift element to correct position
-                    const auto shifted = shifted_counter;
-                    if (shifted > 0) {
-                        converted_value =
-                            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
-                                            std::move(converted_value), Immediate(shifted));
-                    }
-                    shifted_counter += component_size;
-
-                    // add value into result
-                    value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
-
-                    // if we shifted enough for 1 byte -> we save it into temp
-                    if (shifted_counter >= 32) {
-                        SetTemporary(bb, indexer++, std::move(value));
-                        // reset counter and value to prepare pack next byte
-                        value = Immediate(0);
-                        shifted_counter = 0;
-                    }
-                }
-                for (u32 i = 0; i < indexer; ++i) {
-                    SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-                }
-                break;
-            }
-            default:
-                UNREACHABLE();
-                break;
-            }
-        }
-        break;
-    }
-    case OpCode::Id::SUST: {
-        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
-        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
-                         Tegra::Shader::OutOfBoundsStore::Ignore);
-        UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
-
-        std::vector<Node> values;
-        constexpr std::size_t hardcoded_size{4};
-        for (std::size_t i = 0; i < hardcoded_size; ++i) {
-            values.push_back(GetRegister(instr.gpr0.Value() + i));
-        }
-
-        const auto type{instr.suldst.image_type};
-        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
-                                              : GetBindlessImage(instr.gpr39, type)};
-        image.MarkWrite();
-
-        MetaImage meta{image, std::move(values)};
-        bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
-        break;
-    }
-    case OpCode::Id::SUATOM: {
-        UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
-
-        const OperationCode operation_code = [instr] {
-            switch (instr.suatom_d.operation_type) {
-            case Tegra::Shader::ImageAtomicOperationType::S32:
-            case Tegra::Shader::ImageAtomicOperationType::U32:
-                switch (instr.suatom_d.operation) {
-                case Tegra::Shader::ImageAtomicOperation::Add:
-                    return OperationCode::AtomicImageAdd;
-                case Tegra::Shader::ImageAtomicOperation::And:
-                    return OperationCode::AtomicImageAnd;
-                case Tegra::Shader::ImageAtomicOperation::Or:
-                    return OperationCode::AtomicImageOr;
-                case Tegra::Shader::ImageAtomicOperation::Xor:
-                    return OperationCode::AtomicImageXor;
-                case Tegra::Shader::ImageAtomicOperation::Exch:
-                    return OperationCode::AtomicImageExchange;
-                default:
-                    break;
-                }
-                break;
-            default:
-                break;
-            }
-            UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
-                              static_cast<u64>(instr.suatom_d.operation.Value()),
-                              static_cast<u64>(instr.suatom_d.operation_type.Value()));
-            return OperationCode::AtomicImageAdd;
-        }();
-
-        Node value = GetRegister(instr.gpr0);
-
-        const auto type = instr.suatom_d.image_type;
-        auto& image = GetImage(instr.image, type);
-        image.MarkAtomic();
-
-        MetaImage meta{image, {std::move(value)}};
-        SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
-    const auto offset = static_cast<u32>(image.index.Value());
-
-    const auto it =
-        std::find_if(std::begin(used_images), std::end(used_images),
-                     [offset](const ImageEntry& entry) { return entry.offset == offset; });
-    if (it != std::end(used_images)) {
-        ASSERT(!it->is_bindless && it->type == type);
-        return *it;
-    }
-
-    const auto next_index = static_cast<u32>(used_images.size());
-    return used_images.emplace_back(next_index, offset, type);
-}
-
-ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
-    const Node image_register = GetRegister(reg);
-    const auto result =
-        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
-
-    const auto buffer = std::get<1>(result);
-    const auto offset = std::get<2>(result);
-
-    const auto it = std::find_if(std::begin(used_images), std::end(used_images),
-                                 [buffer, offset](const ImageEntry& entry) {
-                                     return entry.buffer == buffer && entry.offset == offset;
-                                 });
-    if (it != std::end(used_images)) {
-        ASSERT(it->is_bindless && it->type == type);
-        return *it;
-    }
-
-    const auto next_index = static_cast<u32>(used_images.size());
-    return used_images.emplace_back(next_index, offset, buffer, type);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
deleted file mode 100644
index 59809bcd8..000000000
--- a/src/video_core/shader/decode/integer_set.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-
-    const Node op_a = GetRegister(instr.gpr8);
-    const Node op_b = [&]() {
-        if (instr.is_b_imm) {
-            return Immediate(instr.alu.GetSignedImm20_20());
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-
-    // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
-    // is true, and to 0 otherwise.
-    const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
-    const Node first_pred =
-        GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
-
-    const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
-
-    const Node predicate = Operation(combiner, first_pred, second_pred);
-
-    const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
-    const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
-    const Node value =
-        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
deleted file mode 100644
index 25e48fef8..000000000
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-
-    const Node op_a = GetRegister(instr.gpr8);
-
-    const Node op_b = [&]() {
-        if (instr.is_b_imm) {
-            return Immediate(instr.alu.GetSignedImm20_20());
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-
-    // We can't use the constant predicate as destination.
-    ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
-    const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
-    const Node predicate =
-        GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
-
-    // Set the primary predicate to the result of Predicate OP SecondPredicate
-    const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
-    const Node value = Operation(combiner, predicate, second_pred);
-    SetPredicate(bb, instr.isetp.pred3, value);
-
-    if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
-        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
-        SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
deleted file mode 100644
index 7728f600e..000000000
--- a/src/video_core/shader/decode/memory.cpp
+++ /dev/null
@@ -1,493 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <utility>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::AtomicOp;
-using Tegra::Shader::AtomicType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::GlobalAtomicType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-using Tegra::Shader::StoreType;
-
-namespace {
-
-OperationCode GetAtomOperation(AtomicOp op) {
-    switch (op) {
-    case AtomicOp::Add:
-        return OperationCode::AtomicIAdd;
-    case AtomicOp::Min:
-        return OperationCode::AtomicIMin;
-    case AtomicOp::Max:
-        return OperationCode::AtomicIMax;
-    case AtomicOp::And:
-        return OperationCode::AtomicIAnd;
-    case AtomicOp::Or:
-        return OperationCode::AtomicIOr;
-    case AtomicOp::Xor:
-        return OperationCode::AtomicIXor;
-    case AtomicOp::Exch:
-        return OperationCode::AtomicIExchange;
-    default:
-        UNIMPLEMENTED_MSG("op={}", op);
-        return OperationCode::AtomicIAdd;
-    }
-}
-
-bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
-    return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
-           uniform_type == Tegra::Shader::UniformType::UnsignedShort;
-}
-
-u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
-    switch (uniform_type) {
-    case Tegra::Shader::UniformType::UnsignedByte:
-        return 0b11;
-    case Tegra::Shader::UniformType::UnsignedShort:
-        return 0b10;
-    default:
-        UNREACHABLE();
-        return 0;
-    }
-}
-
-u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
-    switch (uniform_type) {
-    case Tegra::Shader::UniformType::UnsignedByte:
-        return 8;
-    case Tegra::Shader::UniformType::UnsignedShort:
-        return 16;
-    case Tegra::Shader::UniformType::Single:
-        return 32;
-    case Tegra::Shader::UniformType::Double:
-        return 64;
-    case Tegra::Shader::UniformType::Quad:
-    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 128;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
-        return 32;
-    }
-}
-
-Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
-    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
-    offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
-    return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
-}
-
-Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
-    Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
-    offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
-    return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
-                     Immediate(size));
-}
-
-Node Sign16Extend(Node value) {
-    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
-    Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
-    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
-    return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::LD_A: {
-        // Note: Shouldn't this be interp mode flat? As in no interpolation made.
-        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
-                             "Indirect attribute loads are not supported");
-        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
-                             "Unaligned attribute loads are not supported");
-        UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
-                                 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
-                             "Non-32 bits PHYS reads are not implemented");
-
-        const Node buffer{GetRegister(instr.gpr39)};
-
-        u64 next_element = instr.attribute.fmt20.element;
-        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-
-        const auto LoadNextElement = [&](u32 reg_offset) {
-            const Node attribute{instr.attribute.fmt20.IsPhysical()
-                                     ? GetPhysicalInputAttribute(instr.gpr8, buffer)
-                                     : GetInputAttribute(static_cast<Attribute::Index>(next_index),
-                                                         next_element, buffer)};
-
-            SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
-
-            // Load the next attribute element into the following register. If the element
-            // to load goes beyond the vec4 size, load the first element of the next
-            // attribute.
-            next_element = (next_element + 1) % 4;
-            next_index = next_index + (next_element == 0 ? 1 : 0);
-        };
-
-        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
-        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
-            LoadNextElement(reg_offset);
-        }
-        break;
-    }
-    case OpCode::Id::LD_C: {
-        UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
-
-        Node index = GetRegister(instr.gpr8);
-
-        const Node op_a =
-            GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
-
-        switch (instr.ld_c.type.Value()) {
-        case Tegra::Shader::UniformType::Single:
-            SetRegister(bb, instr.gpr0, op_a);
-            break;
-
-        case Tegra::Shader::UniformType::Double: {
-            const Node op_b =
-                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
-
-            SetTemporary(bb, 0, op_a);
-            SetTemporary(bb, 1, op_b);
-            SetRegister(bb, instr.gpr0, GetTemporary(0));
-            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
-            break;
-        }
-        default:
-            UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
-        }
-        break;
-    }
-    case OpCode::Id::LD_L:
-        LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
-        [[fallthrough]];
-    case OpCode::Id::LD_S: {
-        const auto GetAddress = [&](s32 offset) {
-            ASSERT(offset % 4 == 0);
-            const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
-            return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
-        };
-        const auto GetMemory = [&](s32 offset) {
-            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
-                                                             : GetLocalMemory(GetAddress(offset));
-        };
-
-        switch (instr.ldst_sl.type.Value()) {
-        case StoreType::Signed16:
-            SetRegister(bb, instr.gpr0,
-                        Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
-            break;
-        case StoreType::Bits32:
-        case StoreType::Bits64:
-        case StoreType::Bits128: {
-            const u32 count = [&] {
-                switch (instr.ldst_sl.type.Value()) {
-                case StoreType::Bits32:
-                    return 1;
-                case StoreType::Bits64:
-                    return 2;
-                case StoreType::Bits128:
-                    return 4;
-                default:
-                    UNREACHABLE();
-                    return 0;
-                }
-            }();
-            for (u32 i = 0; i < count; ++i) {
-                SetTemporary(bb, i, GetMemory(i * 4));
-            }
-            for (u32 i = 0; i < count; ++i) {
-                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-            }
-            break;
-        }
-        default:
-            UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
-                              instr.ldst_sl.type.Value());
-        }
-        break;
-    }
-    case OpCode::Id::LD:
-    case OpCode::Id::LDG: {
-        const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::LD:
-                UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
-                return instr.generic.type;
-            case OpCode::Id::LDG:
-                return instr.ldg.type;
-            default:
-                UNREACHABLE();
-                return {};
-            }
-        }();
-
-        const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, true, false);
-
-        const u32 size = GetMemorySize(type);
-        const u32 count = Common::AlignUp(size, 32) / 32;
-        if (!real_address_base || !base_address) {
-            // Tracking failed, load zeroes.
-            for (u32 i = 0; i < count; ++i) {
-                SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
-            }
-            break;
-        }
-
-        for (u32 i = 0; i < count; ++i) {
-            const Node it_offset = Immediate(i * 4);
-            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
-            Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-
-            // To handle unaligned loads get the bytes used to dereference global memory and extract
-            // those bytes from the loaded u32.
-            if (IsUnaligned(type)) {
-                gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
-            }
-
-            SetTemporary(bb, i, gmem);
-        }
-
-        for (u32 i = 0; i < count; ++i) {
-            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-        }
-        break;
-    }
-    case OpCode::Id::ST_A: {
-        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
-                             "Indirect attribute loads are not supported");
-        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
-                             "Unaligned attribute loads are not supported");
-
-        u64 element = instr.attribute.fmt20.element;
-        auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-
-        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
-        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
-            Node dest;
-            if (instr.attribute.fmt20.patch) {
-                const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
-                dest = MakeNode<PatchNode>(offset);
-            } else {
-                dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
-                                          GetRegister(instr.gpr39));
-            }
-            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
-
-            bb.push_back(Operation(OperationCode::Assign, dest, src));
-
-            // Load the next attribute element into the following register. If the element to load
-            // goes beyond the vec4 size, load the first element of the next attribute.
-            element = (element + 1) % 4;
-            index = index + (element == 0 ? 1 : 0);
-        }
-        break;
-    }
-    case OpCode::Id::ST_L:
-        LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
-        [[fallthrough]];
-    case OpCode::Id::ST_S: {
-        const auto GetAddress = [&](s32 offset) {
-            ASSERT(offset % 4 == 0);
-            const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
-            return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
-        };
-
-        const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
-        const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
-        const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
-
-        switch (instr.ldst_sl.type.Value()) {
-        case StoreType::Bits128:
-            (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
-            (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
-            [[fallthrough]];
-        case StoreType::Bits64:
-            (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
-            [[fallthrough]];
-        case StoreType::Bits32:
-            (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
-            break;
-        case StoreType::Unsigned16:
-        case StoreType::Signed16: {
-            Node address = GetAddress(0);
-            Node memory = (this->*get_memory)(address);
-            (this->*set_memory)(
-                bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
-            break;
-        }
-        default:
-            UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
-                              instr.ldst_sl.type.Value());
-        }
-        break;
-    }
-    case OpCode::Id::ST:
-    case OpCode::Id::STG: {
-        const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::ST:
-                UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
-                return instr.generic.type;
-            case OpCode::Id::STG:
-                return instr.stg.type;
-            default:
-                UNREACHABLE();
-                return {};
-            }
-        }();
-
-        // For unaligned reads we have to read memory too.
-        const bool is_read = IsUnaligned(type);
-        const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, is_read, true);
-        if (!real_address_base || !base_address) {
-            // Tracking failed, skip the store.
-            break;
-        }
-
-        const u32 size = GetMemorySize(type);
-        const u32 count = Common::AlignUp(size, 32) / 32;
-        for (u32 i = 0; i < count; ++i) {
-            const Node it_offset = Immediate(i * 4);
-            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
-            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-            Node value = GetRegister(instr.gpr0.Value() + i);
-
-            if (IsUnaligned(type)) {
-                const u32 mask = GetUnalignedMask(type);
-                value = InsertUnaligned(gmem, move(value), real_address, mask, size);
-            }
-
-            bb.push_back(Operation(OperationCode::Assign, gmem, value));
-        }
-        break;
-    }
-    case OpCode::Id::RED: {
-        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
-                             instr.red.type.Value());
-        const auto [real_address, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, true, true);
-        if (!real_address || !base_address) {
-            // Tracking failed, skip atomic.
-            break;
-        }
-        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-        Node value = GetRegister(instr.gpr0);
-        bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
-        break;
-    }
-    case OpCode::Id::ATOM: {
-        UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
-                                 instr.atom.operation == AtomicOp::Dec ||
-                                 instr.atom.operation == AtomicOp::SafeAdd,
-                             "operation={}", instr.atom.operation.Value());
-        UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
-                                 instr.atom.type == GlobalAtomicType::U64 ||
-                                 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
-                                 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
-                             "type={}", instr.atom.type.Value());
-
-        const auto [real_address, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, true, true);
-        if (!real_address || !base_address) {
-            // Tracking failed, skip atomic.
-            break;
-        }
-
-        const bool is_signed =
-            instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
-        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-        SetRegister(bb, instr.gpr0,
-                    SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
-                                    GetRegister(instr.gpr20)));
-        break;
-    }
-    case OpCode::Id::ATOMS: {
-        UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
-                                 instr.atoms.operation == AtomicOp::Dec,
-                             "operation={}", instr.atoms.operation.Value());
-        UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
-                                 instr.atoms.type == AtomicType::U64,
-                             "type={}", instr.atoms.type.Value());
-        const bool is_signed =
-            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
-        const s32 offset = instr.atoms.GetImmediateOffset();
-        Node address = GetRegister(instr.gpr8);
-        address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
-        SetRegister(bb, instr.gpr0,
-                    SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
-                                    GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
-        break;
-    }
-    case OpCode::Id::AL2P: {
-        // Ignore al2p.direction since we don't care about it.
-
-        // Calculate emulation fake physical address.
-        const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
-        const Node reg{GetRegister(instr.gpr8)};
-        const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
-
-        // Set the fake address to target register.
-        SetRegister(bb, instr.gpr0, fake_address);
-
-        // Signal the shader IR to declare all possible attributes and varyings
-        uses_physical_attributes = true;
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
-                                                                     Instruction instr,
-                                                                     bool is_read, bool is_write) {
-    const auto addr_register{GetRegister(instr.gmem.gpr)};
-    const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
-
-    const auto [base_address, index, offset] =
-        TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT_OR_EXECUTE_MSG(
-        base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
-        "Global memory tracking failed");
-
-    bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
-
-    const GlobalMemoryBase descriptor{index, offset};
-    const auto& entry = used_global_memory.try_emplace(descriptor).first;
-    auto& usage = entry->second;
-    usage.is_written |= is_write;
-    usage.is_read |= is_read;
-
-    const auto real_address =
-        Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
-
-    return {real_address, base_address, descriptor};
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
deleted file mode 100644
index 5f88537bc..000000000
--- a/src/video_core/shader/decode/other.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::IpaInterpMode;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using Tegra::Shader::SystemVariable;
-
-using Index = Tegra::Shader::Attribute::Index;
-
-u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::NOP: {
-        UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
-        UNIMPLEMENTED_IF(instr.nop.trigger != 0);
-        // With the previous preconditions, this instruction is a no-operation.
-        break;
-    }
-    case OpCode::Id::EXIT: {
-        const ConditionCode cc = instr.flow_condition_code;
-        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
-
-        switch (instr.flow.cond) {
-        case Tegra::Shader::FlowCondition::Always:
-            bb.push_back(Operation(OperationCode::Exit));
-            if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
-                // If this is an unconditional exit then just end processing here,
-                // otherwise we have to account for the possibility of the condition
-                // not being met, so continue processing the next instruction.
-                pc = MAX_PROGRAM_LENGTH - 1;
-            }
-            break;
-
-        case Tegra::Shader::FlowCondition::Fcsm_Tr:
-            // TODO(bunnei): What is this used for? If we assume this conditon is not
-            // satisifed, dual vertex shaders in Farming Simulator make more sense
-            UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
-            break;
-
-        default:
-            UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
-        }
-        break;
-    }
-    case OpCode::Id::KIL: {
-        UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
-
-        const ConditionCode cc = instr.flow_condition_code;
-        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
-
-        bb.push_back(Operation(OperationCode::Discard));
-        break;
-    }
-    case OpCode::Id::S2R: {
-        const Node value = [this, instr] {
-            switch (instr.sys20) {
-            case SystemVariable::LaneId:
-                return Operation(OperationCode::ThreadId);
-            case SystemVariable::InvocationId:
-                return Operation(OperationCode::InvocationId);
-            case SystemVariable::Ydirection:
-                uses_y_negate = true;
-                return Operation(OperationCode::YNegate);
-            case SystemVariable::InvocationInfo:
-                LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
-                return Immediate(0x00ff'0000U);
-            case SystemVariable::WscaleFactorXY:
-                UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
-                return Immediate(0U);
-            case SystemVariable::WscaleFactorZ:
-                UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
-                return Immediate(0U);
-            case SystemVariable::Tid: {
-                Node val = Immediate(0);
-                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
-                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
-                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
-                return val;
-            }
-            case SystemVariable::TidX:
-                return Operation(OperationCode::LocalInvocationIdX);
-            case SystemVariable::TidY:
-                return Operation(OperationCode::LocalInvocationIdY);
-            case SystemVariable::TidZ:
-                return Operation(OperationCode::LocalInvocationIdZ);
-            case SystemVariable::CtaIdX:
-                return Operation(OperationCode::WorkGroupIdX);
-            case SystemVariable::CtaIdY:
-                return Operation(OperationCode::WorkGroupIdY);
-            case SystemVariable::CtaIdZ:
-                return Operation(OperationCode::WorkGroupIdZ);
-            case SystemVariable::EqMask:
-            case SystemVariable::LtMask:
-            case SystemVariable::LeMask:
-            case SystemVariable::GtMask:
-            case SystemVariable::GeMask:
-                uses_warps = true;
-                switch (instr.sys20) {
-                case SystemVariable::EqMask:
-                    return Operation(OperationCode::ThreadEqMask);
-                case SystemVariable::LtMask:
-                    return Operation(OperationCode::ThreadLtMask);
-                case SystemVariable::LeMask:
-                    return Operation(OperationCode::ThreadLeMask);
-                case SystemVariable::GtMask:
-                    return Operation(OperationCode::ThreadGtMask);
-                case SystemVariable::GeMask:
-                    return Operation(OperationCode::ThreadGeMask);
-                default:
-                    UNREACHABLE();
-                    return Immediate(0u);
-                }
-            default:
-                UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
-                return Immediate(0u);
-            }
-        }();
-        SetRegister(bb, instr.gpr0, value);
-
-        break;
-    }
-    case OpCode::Id::BRA: {
-        Node branch;
-        if (instr.bra.constant_buffer == 0) {
-            const u32 target = pc + instr.bra.GetBranchTarget();
-            branch = Operation(OperationCode::Branch, Immediate(target));
-        } else {
-            const u32 target = pc + 1;
-            const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
-            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
-                                                 PRECISE, op_a, Immediate(3));
-            const Node operand =
-                Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
-            branch = Operation(OperationCode::BranchIndirect, operand);
-        }
-
-        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
-        if (cc != Tegra::Shader::ConditionCode::T) {
-            bb.push_back(Conditional(GetConditionCode(cc), {branch}));
-        } else {
-            bb.push_back(branch);
-        }
-        break;
-    }
-    case OpCode::Id::BRX: {
-        Node operand;
-        if (instr.brx.constant_buffer != 0) {
-            const s32 target = pc + 1;
-            const Node index = GetRegister(instr.gpr8);
-            const Node op_a =
-                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
-            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
-                                                 PRECISE, op_a, Immediate(3));
-            operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
-        } else {
-            const s32 target = pc + instr.brx.GetBranchExtend();
-            const Node op_a = GetRegister(instr.gpr8);
-            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
-                                                 PRECISE, op_a, Immediate(3));
-            operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
-        }
-        const Node branch = Operation(OperationCode::BranchIndirect, operand);
-
-        const ConditionCode cc = instr.flow_condition_code;
-        if (cc != ConditionCode::T) {
-            bb.push_back(Conditional(GetConditionCode(cc), {branch}));
-        } else {
-            bb.push_back(branch);
-        }
-        break;
-    }
-    case OpCode::Id::SSY: {
-        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
-                             "Constant buffer flow is not supported");
-
-        if (disable_flow_stack) {
-            break;
-        }
-
-        // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
-        const u32 target = pc + instr.bra.GetBranchTarget();
-        bb.push_back(
-            Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
-        break;
-    }
-    case OpCode::Id::PBK: {
-        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
-                             "Constant buffer PBK is not supported");
-
-        if (disable_flow_stack) {
-            break;
-        }
-
-        // PBK pushes to a stack the address where BRK will jump to.
-        const u32 target = pc + instr.bra.GetBranchTarget();
-        bb.push_back(
-            Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
-        break;
-    }
-    case OpCode::Id::SYNC: {
-        const ConditionCode cc = instr.flow_condition_code;
-        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
-
-        if (decompiled) {
-            break;
-        }
-
-        // The SYNC opcode jumps to the address previously set by the SSY opcode
-        bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
-        break;
-    }
-    case OpCode::Id::BRK: {
-        const ConditionCode cc = instr.flow_condition_code;
-        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
-        if (decompiled) {
-            break;
-        }
-
-        // The BRK opcode jumps to the address previously set by the PBK opcode
-        bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
-        break;
-    }
-    case OpCode::Id::IPA: {
-        const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
-        const auto attribute = instr.attribute.fmt28;
-        const Index index = attribute.index;
-
-        Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
-                                 : GetInputAttribute(index, attribute.element);
-
-        // Code taken from Ryujinx.
-        if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
-            const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
-            if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
-                Node position_w = GetInputAttribute(Index::Position, 3);
-                value = Operation(OperationCode::FMul, move(value), move(position_w));
-            }
-        }
-
-        if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
-            value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
-        }
-
-        value = GetSaturatedFloat(move(value), instr.ipa.saturate);
-
-        SetRegister(bb, instr.gpr0, move(value));
-        break;
-    }
-    case OpCode::Id::OUT_R: {
-        UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
-                             "Stream buffer is not supported");
-
-        if (instr.out.emit) {
-            // gpr0 is used to store the next address and gpr8 contains the address to emit.
-            // Hardware uses pointers here but we just ignore it
-            bb.push_back(Operation(OperationCode::EmitVertex));
-            SetRegister(bb, instr.gpr0, Immediate(0));
-        }
-        if (instr.out.cut) {
-            bb.push_back(Operation(OperationCode::EndPrimitive));
-        }
-        break;
-    }
-    case OpCode::Id::ISBERD: {
-        UNIMPLEMENTED_IF(instr.isberd.o != 0);
-        UNIMPLEMENTED_IF(instr.isberd.skew != 0);
-        UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
-        UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
-        LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
-        SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
-        break;
-    }
-    case OpCode::Id::BAR: {
-        UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
-        bb.push_back(Operation(OperationCode::Barrier));
-        break;
-    }
-    case OpCode::Id::MEMBAR: {
-        UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
-        const OperationCode type = [instr] {
-            switch (instr.membar.type) {
-            case Tegra::Shader::MembarType::CTA:
-                return OperationCode::MemoryBarrierGroup;
-            case Tegra::Shader::MembarType::GL:
-                return OperationCode::MemoryBarrierGlobal;
-            default:
-                UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
-                return OperationCode::MemoryBarrierGlobal;
-            }
-        }();
-        bb.push_back(Operation(type));
-        break;
-    }
-    case OpCode::Id::DEPBAR: {
-        LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
deleted file mode 100644
index 9290d22eb..000000000
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::PSETP: {
-        const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
-        const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
-
-        // We can't use the constant predicate as destination.
-        ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
-        const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
-
-        const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
-        const Node predicate = Operation(combiner, op_a, op_b);
-
-        // Set the primary predicate to the result of Predicate OP SecondPredicate
-        SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
-
-        if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-            // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
-            // enabled
-            SetPredicate(bb, instr.psetp.pred0,
-                         Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
-                                   second_pred));
-        }
-        break;
-    }
-    case OpCode::Id::CSETP: {
-        const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
-        const Node condition_code = GetConditionCode(instr.csetp.cc);
-
-        const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
-
-        if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
-            SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
-        }
-        if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-            const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
-            SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
-        }
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
deleted file mode 100644
index 84dbc50fe..000000000
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-
-    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                         "Condition codes generation in PSET is not implemented");
-
-    const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
-    const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
-    const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
-
-    const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
-
-    const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
-    const Node predicate = Operation(combiner, first_pred, second_pred);
-
-    const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
-    const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
-    const Node value =
-        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
-    if (instr.pset.bf) {
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-    } else {
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-    }
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
deleted file mode 100644
index 6116c31aa..000000000
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <utility>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-namespace {
-constexpr u64 NUM_CONDITION_CODES = 4;
-constexpr u64 NUM_PREDICATES = 7;
-} // namespace
-
-u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    Node apply_mask = [this, opcode, instr] {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::R2P_IMM:
-        case OpCode::Id::P2R_IMM:
-            return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
-        default:
-            UNREACHABLE();
-            return Immediate(0);
-        }
-    }();
-
-    const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
-
-    const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
-    const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
-    const auto get_entry = [this, cc](u64 entry) {
-        return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
-    };
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::R2P_IMM: {
-        Node mask = GetRegister(instr.gpr8);
-
-        for (u64 entry = 0; entry < num_entries; ++entry) {
-            const u32 shift = static_cast<u32>(entry);
-
-            Node apply = BitfieldExtract(apply_mask, shift, 1);
-            Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
-
-            Node compare = BitfieldExtract(mask, offset + shift, 1);
-            Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
-
-            Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
-            bb.push_back(Conditional(condition, {move(code)}));
-        }
-        break;
-    }
-    case OpCode::Id::P2R_IMM: {
-        Node value = Immediate(0);
-        for (u64 entry = 0; entry < num_entries; ++entry) {
-            Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
-                                 Immediate(0));
-            value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
-        }
-        value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
-        value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
-
-        SetRegister(bb, instr.gpr0, move(value));
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
-        break;
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
deleted file mode 100644
index a53819c15..000000000
--- a/src/video_core/shader/decode/shift.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::ShfType;
-using Tegra::Shader::ShfXmode;
-
-namespace {
-
-Node IsFull(Node shift) {
-    return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
-}
-
-Node Shift(OperationCode opcode, Node value, Node shift) {
-    Node shifted = Operation(opcode, move(value), shift);
-    return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
-}
-
-Node ClampShift(Node shift, s32 size = 32) {
-    shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
-    return Operation(OperationCode::IMin, move(shift), Immediate(size));
-}
-
-Node WrapShift(Node shift, s32 size = 32) {
-    return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
-}
-
-Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
-    // These values are used when the shift value is less than 32
-    Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
-    Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
-    Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
-
-    if (type == ShfType::Bits32) {
-        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
-        return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
-    }
-
-    // And these when it's larger than or 32
-    const bool is_signed = type == ShfType::S64;
-    const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
-    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
-    Node greater = Shift(opcode, high, move(reduced));
-
-    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
-    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
-
-    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
-    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
-}
-
-Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
-    // These values are used when the shift value is less than 32
-    Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
-    Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
-    Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
-
-    if (type == ShfType::Bits32) {
-        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
-        return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
-    }
-
-    // And these when it's larger than or 32
-    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
-    Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
-
-    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
-    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
-
-    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
-    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    Node op_a = GetRegister(instr.gpr8);
-    Node op_b = [this, instr] {
-        if (instr.is_b_imm) {
-            return Immediate(instr.alu.GetSignedImm20_20());
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-
-    switch (const auto opid = opcode->get().GetId(); opid) {
-    case OpCode::Id::SHR_C:
-    case OpCode::Id::SHR_R:
-    case OpCode::Id::SHR_IMM: {
-        op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
-
-        Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
-                                     move(op_a), move(op_b));
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, move(value));
-        break;
-    }
-    case OpCode::Id::SHL_C:
-    case OpCode::Id::SHL_R:
-    case OpCode::Id::SHL_IMM: {
-        Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, move(value));
-        break;
-    }
-    case OpCode::Id::SHF_RIGHT_R:
-    case OpCode::Id::SHF_RIGHT_IMM:
-    case OpCode::Id::SHF_LEFT_R:
-    case OpCode::Id::SHF_LEFT_IMM: {
-        UNIMPLEMENTED_IF(instr.generates_cc);
-        UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
-                             instr.shf.xmode.Value());
-
-        if (instr.is_b_imm) {
-            op_b = Immediate(static_cast<u32>(instr.shf.immediate));
-        }
-        const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
-        Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
-
-        Node negated_shift = Operation(OperationCode::INegate, shift);
-        Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
-
-        const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
-        Node value = (is_right ? ShiftRight : ShiftLeft)(
-            move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
-
-        SetRegister(bb, instr.gpr0, move(value));
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
deleted file mode 100644
index c69681e8d..000000000
--- a/src/video_core/shader/decode/texture.cpp
+++ /dev/null
@@ -1,935 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureMiscMode;
-using Tegra::Shader::TextureProcessMode;
-using Tegra::Shader::TextureType;
-
-static std::size_t GetCoordCount(TextureType texture_type) {
-    switch (texture_type) {
-    case TextureType::Texture1D:
-        return 1;
-    case TextureType::Texture2D:
-        return 2;
-    case TextureType::Texture3D:
-    case TextureType::TextureCube:
-        return 3;
-    default:
-        UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
-        return 0;
-    }
-}
-
-u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-    bool is_bindless = false;
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::TEX: {
-        const TextureType texture_type{instr.tex.texture_type};
-        const bool is_array = instr.tex.array != 0;
-        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
-        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
-        const auto process_mode = instr.tex.GetTextureProcessMode();
-        WriteTexInstructionFloat(
-            bb, instr,
-            GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
-        break;
-    }
-    case OpCode::Id::TEX_B: {
-        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-
-        const TextureType texture_type{instr.tex_b.texture_type};
-        const bool is_array = instr.tex_b.array != 0;
-        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
-        const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
-        const auto process_mode = instr.tex_b.GetTextureProcessMode();
-        WriteTexInstructionFloat(bb, instr,
-                                 GetTexCode(instr, texture_type, process_mode, depth_compare,
-                                            is_array, is_aoffi, {instr.gpr20}));
-        break;
-    }
-    case OpCode::Id::TEXS: {
-        const TextureType texture_type{instr.texs.GetTextureType()};
-        const bool is_array{instr.texs.IsArrayTexture()};
-        const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
-        const auto process_mode = instr.texs.GetTextureProcessMode();
-
-        const Node4 components =
-            GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
-
-        if (instr.texs.fp32_flag) {
-            WriteTexsInstructionFloat(bb, instr, components);
-        } else {
-            WriteTexsInstructionHalfFloat(bb, instr, components);
-        }
-        break;
-    }
-    case OpCode::Id::TLD4_B: {
-        is_bindless = true;
-        [[fallthrough]];
-    }
-    case OpCode::Id::TLD4: {
-        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
-                             "NDV is not implemented");
-        const auto texture_type = instr.tld4.texture_type.Value();
-        const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
-                                               : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
-        const bool is_array = instr.tld4.array != 0;
-        const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
-                                          : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
-        const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
-                                        : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
-        WriteTexInstructionFloat(bb, instr,
-                                 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
-                                             is_ptp, is_bindless));
-        break;
-    }
-    case OpCode::Id::TLD4S: {
-        constexpr std::size_t num_coords = 2;
-        const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
-        const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
-        const Node op_a = GetRegister(instr.gpr8);
-        const Node op_b = GetRegister(instr.gpr20);
-
-        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
-        std::vector<Node> coords;
-        std::vector<Node> aoffi;
-        Node depth_compare;
-        if (is_depth_compare) {
-            // Note: TLD4S coordinate encoding works just like TEXS's
-            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
-            coords.push_back(op_a);
-            coords.push_back(op_y);
-            if (is_aoffi) {
-                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
-                depth_compare = GetRegister(instr.gpr20.Value() + 1);
-            } else {
-                depth_compare = op_b;
-            }
-        } else {
-            // There's no depth compare
-            coords.push_back(op_a);
-            if (is_aoffi) {
-                coords.push_back(GetRegister(instr.gpr8.Value() + 1));
-                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
-            } else {
-                coords.push_back(op_b);
-            }
-        }
-        const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
-
-        SamplerInfo info;
-        info.is_shadow = is_depth_compare;
-        const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
-
-        Node4 values;
-        for (u32 element = 0; element < values.size(); ++element) {
-            MetaTexture meta{*sampler, {}, depth_compare, aoffi,   {}, {},
-                             {},       {}, component,     element, {}};
-            values[element] = Operation(OperationCode::TextureGather, meta, coords);
-        }
-
-        if (instr.tld4s.fp16_flag) {
-            WriteTexsInstructionHalfFloat(bb, instr, values, true);
-        } else {
-            WriteTexsInstructionFloat(bb, instr, values, true);
-        }
-        break;
-    }
-    case OpCode::Id::TXD_B:
-        is_bindless = true;
-        [[fallthrough]];
-    case OpCode::Id::TXD: {
-        UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-
-        const bool is_array = instr.txd.is_array != 0;
-        const auto derivate_reg = instr.gpr20.Value();
-        const auto texture_type = instr.txd.texture_type.Value();
-        const auto coord_count = GetCoordCount(texture_type);
-        u64 base_reg = instr.gpr8.Value();
-        Node index_var;
-        SamplerInfo info;
-        info.type = texture_type;
-        info.is_array = is_array;
-        const std::optional<SamplerEntry> sampler =
-            is_bindless ? GetBindlessSampler(base_reg, info, index_var)
-                        : GetSampler(instr.sampler, info);
-        Node4 values;
-        if (!sampler) {
-            std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
-            WriteTexInstructionFloat(bb, instr, values);
-            break;
-        }
-
-        if (is_bindless) {
-            base_reg++;
-        }
-
-        std::vector<Node> coords;
-        std::vector<Node> derivates;
-        for (std::size_t i = 0; i < coord_count; ++i) {
-            coords.push_back(GetRegister(base_reg + i));
-            const std::size_t derivate = i * 2;
-            derivates.push_back(GetRegister(derivate_reg + derivate));
-            derivates.push_back(GetRegister(derivate_reg + derivate + 1));
-        }
-
-        Node array_node = {};
-        if (is_array) {
-            const Node info_reg = GetRegister(base_reg + coord_count);
-            array_node = BitfieldExtract(info_reg, 0, 16);
-        }
-
-        for (u32 element = 0; element < values.size(); ++element) {
-            MetaTexture meta{*sampler, array_node, {}, {},      {},       derivates,
-                             {},       {},         {}, element, index_var};
-            values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
-        }
-
-        WriteTexInstructionFloat(bb, instr, values);
-
-        break;
-    }
-    case OpCode::Id::TXQ_B:
-        is_bindless = true;
-        [[fallthrough]];
-    case OpCode::Id::TXQ: {
-        Node index_var;
-        const std::optional<SamplerEntry> sampler =
-            is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
-                        : GetSampler(instr.sampler, {});
-
-        if (!sampler) {
-            u32 indexer = 0;
-            for (u32 element = 0; element < 4; ++element) {
-                if (!instr.txq.IsComponentEnabled(element)) {
-                    continue;
-                }
-                const Node value = Immediate(0);
-                SetTemporary(bb, indexer++, value);
-            }
-            for (u32 i = 0; i < indexer; ++i) {
-                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-            }
-            break;
-        }
-
-        u32 indexer = 0;
-        switch (instr.txq.query_type) {
-        case Tegra::Shader::TextureQueryType::Dimension: {
-            for (u32 element = 0; element < 4; ++element) {
-                if (!instr.txq.IsComponentEnabled(element)) {
-                    continue;
-                }
-                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
-                const Node value =
-                    Operation(OperationCode::TextureQueryDimensions, meta,
-                              GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
-                SetTemporary(bb, indexer++, value);
-            }
-            for (u32 i = 0; i < indexer; ++i) {
-                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-            }
-            break;
-        }
-        default:
-            UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
-        }
-        break;
-    }
-    case OpCode::Id::TMML_B:
-        is_bindless = true;
-        [[fallthrough]];
-    case OpCode::Id::TMML: {
-        UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
-                             "NDV is not implemented");
-
-        const auto texture_type = instr.tmml.texture_type.Value();
-        const bool is_array = instr.tmml.array != 0;
-        SamplerInfo info;
-        info.type = texture_type;
-        info.is_array = is_array;
-        Node index_var;
-        const std::optional<SamplerEntry> sampler =
-            is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
-                        : GetSampler(instr.sampler, info);
-
-        if (!sampler) {
-            u32 indexer = 0;
-            for (u32 element = 0; element < 2; ++element) {
-                if (!instr.tmml.IsComponentEnabled(element)) {
-                    continue;
-                }
-                const Node value = Immediate(0);
-                SetTemporary(bb, indexer++, value);
-            }
-            for (u32 i = 0; i < indexer; ++i) {
-                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-            }
-            break;
-        }
-
-        const u64 base_index = is_array ? 1 : 0;
-        const u64 num_components = [texture_type] {
-            switch (texture_type) {
-            case TextureType::Texture1D:
-                return 1;
-            case TextureType::Texture2D:
-                return 2;
-            case TextureType::TextureCube:
-                return 3;
-            default:
-                UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
-                return 2;
-            }
-        }();
-        // TODO: What's the array component used for?
-
-        std::vector<Node> coords;
-        coords.reserve(num_components);
-        for (u64 component = 0; component < num_components; ++component) {
-            coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
-        }
-
-        u32 indexer = 0;
-        for (u32 element = 0; element < 2; ++element) {
-            if (!instr.tmml.IsComponentEnabled(element)) {
-                continue;
-            }
-            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
-            Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
-            SetTemporary(bb, indexer++, std::move(value));
-        }
-        for (u32 i = 0; i < indexer; ++i) {
-            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-        }
-        break;
-    }
-    case OpCode::Id::TLD: {
-        UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
-
-        WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
-        break;
-    }
-    case OpCode::Id::TLDS: {
-        const TextureType texture_type{instr.tlds.GetTextureType()};
-        const bool is_array{instr.tlds.IsArrayTexture()};
-
-        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
-
-        const Node4 components = GetTldsCode(instr, texture_type, is_array);
-
-        if (instr.tlds.fp32_flag) {
-            WriteTexsInstructionFloat(bb, instr, components);
-        } else {
-            WriteTexsInstructionHalfFloat(bb, instr, components);
-        }
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
-    SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
-    if (info.IsComplete()) {
-        return info;
-    }
-    if (!sampler) {
-        LOG_WARNING(HW_GPU, "Unknown sampler info");
-        info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
-        info.is_array = info.is_array.value_or(false);
-        info.is_shadow = info.is_shadow.value_or(false);
-        info.is_buffer = info.is_buffer.value_or(false);
-        return info;
-    }
-    info.type = info.type.value_or(sampler->texture_type);
-    info.is_array = info.is_array.value_or(sampler->is_array != 0);
-    info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
-    info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
-    return info;
-}
-
-std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
-                                                 SamplerInfo sampler_info) {
-    const u32 offset = static_cast<u32>(sampler.index.Value());
-    const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
-
-    // If this sampler has already been used, return the existing mapping.
-    const auto it =
-        std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [offset](const SamplerEntry& entry) { return entry.offset == offset; });
-    if (it != used_samplers.end()) {
-        ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
-               it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
-        return *it;
-    }
-
-    // Otherwise create a new mapping for this sampler
-    const auto next_index = static_cast<u32>(used_samplers.size());
-    return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
-                                      *info.is_shadow, *info.is_buffer, false);
-}
-
-std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
-                                                         SamplerInfo info, Node& index_var) {
-    const Node sampler_register = GetRegister(reg);
-    const auto [base_node, tracked_sampler_info] =
-        TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
-    if (!base_node) {
-        UNREACHABLE();
-        return std::nullopt;
-    }
-
-    if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
-        const u32 buffer = sampler_info->index;
-        const u32 offset = sampler_info->offset;
-        info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
-
-        // If this sampler has already been used, return the existing mapping.
-        const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
-                                     [buffer, offset](const SamplerEntry& entry) {
-                                         return entry.buffer == buffer && entry.offset == offset;
-                                     });
-        if (it != used_samplers.end()) {
-            ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
-                   it->is_shadow == info.is_shadow);
-            return *it;
-        }
-
-        // Otherwise create a new mapping for this sampler
-        const auto next_index = static_cast<u32>(used_samplers.size());
-        return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
-                                          *info.is_shadow, *info.is_buffer, false);
-    }
-    if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
-        const std::pair indices = sampler_info->indices;
-        const std::pair offsets = sampler_info->offsets;
-        info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
-
-        // Try to use an already created sampler if it exists
-        const auto it =
-            std::find_if(used_samplers.begin(), used_samplers.end(),
-                         [indices, offsets](const SamplerEntry& entry) {
-                             return offsets == std::pair{entry.offset, entry.secondary_offset} &&
-                                    indices == std::pair{entry.buffer, entry.secondary_buffer};
-                         });
-        if (it != used_samplers.end()) {
-            ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
-                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
-            return *it;
-        }
-
-        // Otherwise create a new mapping for this sampler
-        const u32 next_index = static_cast<u32>(used_samplers.size());
-        return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
-                                          *info.is_shadow, *info.is_buffer);
-    }
-    if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
-        const u32 base_offset = sampler_info->base_offset / 4;
-        index_var = GetCustomVariable(sampler_info->bindless_var);
-        info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
-
-        // If this sampler has already been used, return the existing mapping.
-        const auto it = std::find_if(
-            used_samplers.begin(), used_samplers.end(),
-            [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
-        if (it != used_samplers.end()) {
-            ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
-                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
-                   it->is_indexed);
-            return *it;
-        }
-
-        uses_indexed_samplers = true;
-        // Otherwise create a new mapping for this sampler
-        const auto next_index = static_cast<u32>(used_samplers.size());
-        return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
-                                          *info.is_shadow, *info.is_buffer, true);
-    }
-    return std::nullopt;
-}
-
-void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
-    u32 dest_elem = 0;
-    for (u32 elem = 0; elem < 4; ++elem) {
-        if (!instr.tex.IsComponentEnabled(elem)) {
-            // Skip disabled components
-            continue;
-        }
-        SetTemporary(bb, dest_elem++, components[elem]);
-    }
-    // After writing values in temporals, move them to the real registers
-    for (u32 i = 0; i < dest_elem; ++i) {
-        SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
-    }
-}
-
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
-                                         bool ignore_mask) {
-    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
-    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
-
-    u32 dest_elem = 0;
-    for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
-            continue;
-        SetTemporary(bb, dest_elem++, components[component]);
-    }
-
-    for (u32 i = 0; i < dest_elem; ++i) {
-        if (i < 2) {
-            // Write the first two swizzle components to gpr0 and gpr0+1
-            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
-        } else {
-            ASSERT(instr.texs.HasTwoDestinations());
-            // Write the rest of the swizzle components to gpr28 and gpr28+1
-            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
-        }
-    }
-}
-
-void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
-                                             const Node4& components, bool ignore_mask) {
-    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
-    // float instruction).
-
-    Node4 values;
-    u32 dest_elem = 0;
-    for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
-            continue;
-        values[dest_elem++] = components[component];
-    }
-    if (dest_elem == 0)
-        return;
-
-    std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
-
-    const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
-    if (dest_elem <= 2) {
-        SetRegister(bb, instr.gpr0, first_value);
-        return;
-    }
-
-    SetTemporary(bb, 0, first_value);
-    SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
-
-    SetRegister(bb, instr.gpr0, GetTemporary(0));
-    SetRegister(bb, instr.gpr28, GetTemporary(1));
-}
-
-Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
-                               TextureProcessMode process_mode, std::vector<Node> coords,
-                               Node array, Node depth_compare, u32 bias_offset,
-                               std::vector<Node> aoffi,
-                               std::optional<Tegra::Shader::Register> bindless_reg) {
-    const bool is_array = array != nullptr;
-    const bool is_shadow = depth_compare != nullptr;
-    const bool is_bindless = bindless_reg.has_value();
-
-    ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
-               "Illegal texture type");
-
-    SamplerInfo info;
-    info.type = texture_type;
-    info.is_array = is_array;
-    info.is_shadow = is_shadow;
-    info.is_buffer = false;
-
-    Node index_var;
-    const std::optional<SamplerEntry> sampler =
-        is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
-                    : GetSampler(instr.sampler, info);
-    if (!sampler) {
-        return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
-    }
-
-    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
-                            process_mode == TextureProcessMode::LL ||
-                            process_mode == TextureProcessMode::LLA;
-    const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
-
-    Node bias;
-    Node lod;
-    switch (process_mode) {
-    case TextureProcessMode::None:
-        break;
-    case TextureProcessMode::LZ:
-        lod = Immediate(0.0f);
-        break;
-    case TextureProcessMode::LB:
-        // If present, lod or bias are always stored in the register indexed by the gpr20 field with
-        // an offset depending on the usage of the other registers.
-        bias = GetRegister(instr.gpr20.Value() + bias_offset);
-        break;
-    case TextureProcessMode::LL:
-        lod = GetRegister(instr.gpr20.Value() + bias_offset);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
-        break;
-    }
-
-    Node4 values;
-    for (u32 element = 0; element < values.size(); ++element) {
-        MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias,
-                         lod,      {},    element,       index_var};
-        values[element] = Operation(opcode, meta, coords);
-    }
-
-    return values;
-}
-
-Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
-                           TextureProcessMode process_mode, bool depth_compare, bool is_array,
-                           bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
-    const bool lod_bias_enabled{
-        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
-
-    const bool is_bindless = bindless_reg.has_value();
-
-    u64 parameter_register = instr.gpr20.Value();
-    if (is_bindless) {
-        ++parameter_register;
-    }
-
-    const u32 bias_lod_offset = (is_bindless ? 1 : 0);
-    if (lod_bias_enabled) {
-        ++parameter_register;
-    }
-
-    const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
-                                                              lod_bias_enabled, 4, 5);
-    const auto coord_count = std::get<0>(coord_counts);
-    // If enabled arrays index is always stored in the gpr8 field
-    const u64 array_register = instr.gpr8.Value();
-    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
-    const u64 coord_register = array_register + (is_array ? 1 : 0);
-
-    std::vector<Node> coords;
-    for (std::size_t i = 0; i < coord_count; ++i) {
-        coords.push_back(GetRegister(coord_register + i));
-    }
-    // 1D.DC in OpenGL the 2nd component is ignored.
-    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
-        coords.push_back(Immediate(0.0f));
-    }
-
-    const Node array = is_array ? GetRegister(array_register) : nullptr;
-
-    std::vector<Node> aoffi;
-    if (is_aoffi) {
-        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
-    }
-
-    Node dc;
-    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
-        // or bias are used
-        dc = GetRegister(parameter_register++);
-    }
-
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
-                          aoffi, bindless_reg);
-}
-
-Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
-                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
-    const bool lod_bias_enabled =
-        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
-
-    const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
-                                                              lod_bias_enabled, 4, 4);
-    const auto coord_count = std::get<0>(coord_counts);
-
-    // If enabled arrays index is always stored in the gpr8 field
-    const u64 array_register = instr.gpr8.Value();
-    // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
-    const u64 coord_register = array_register + (is_array ? 1 : 0);
-    const u64 last_coord_register =
-        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
-            ? static_cast<u64>(instr.gpr20.Value())
-            : coord_register + 1;
-    const u32 bias_offset = coord_count > 2 ? 1 : 0;
-
-    std::vector<Node> coords;
-    for (std::size_t i = 0; i < coord_count; ++i) {
-        const bool last = (i == (coord_count - 1)) && (coord_count > 1);
-        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
-    }
-
-    const Node array = is_array ? GetRegister(array_register) : nullptr;
-
-    Node dc;
-    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
-        // or bias are used
-        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        dc = GetRegister(depth_register);
-    }
-
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
-                          {});
-}
-
-Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                            bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
-    ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
-
-    const std::size_t coord_count = GetCoordCount(texture_type);
-
-    // If enabled arrays index is always stored in the gpr8 field
-    const u64 array_register = instr.gpr8.Value();
-    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
-    const u64 coord_register = array_register + (is_array ? 1 : 0);
-
-    std::vector<Node> coords;
-    for (std::size_t i = 0; i < coord_count; ++i) {
-        coords.push_back(GetRegister(coord_register + i));
-    }
-
-    u64 parameter_register = instr.gpr20.Value();
-
-    SamplerInfo info;
-    info.type = texture_type;
-    info.is_array = is_array;
-    info.is_shadow = depth_compare;
-
-    Node index_var;
-    const std::optional<SamplerEntry> sampler =
-        is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
-                    : GetSampler(instr.sampler, info);
-    Node4 values;
-    if (!sampler) {
-        for (u32 element = 0; element < values.size(); ++element) {
-            values[element] = Immediate(0);
-        }
-        return values;
-    }
-
-    std::vector<Node> aoffi, ptp;
-    if (is_aoffi) {
-        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
-    } else if (is_ptp) {
-        ptp = GetPtpCoordinates(
-            {GetRegister(parameter_register++), GetRegister(parameter_register++)});
-    }
-
-    Node dc;
-    if (depth_compare) {
-        dc = GetRegister(parameter_register++);
-    }
-
-    const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
-                                       : Immediate(static_cast<u32>(instr.tld4.component));
-
-    for (u32 element = 0; element < values.size(); ++element) {
-        auto coords_copy = coords;
-        MetaTexture meta{
-            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
-            index_var};
-        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
-    }
-
-    return values;
-}
-
-Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
-    const auto texture_type{instr.tld.texture_type};
-    const bool is_array{instr.tld.is_array != 0};
-    const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
-    const std::size_t coord_count{GetCoordCount(texture_type)};
-
-    u64 gpr8_cursor{instr.gpr8.Value()};
-    const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
-
-    std::vector<Node> coords;
-    coords.reserve(coord_count);
-    for (std::size_t i = 0; i < coord_count; ++i) {
-        coords.push_back(GetRegister(gpr8_cursor++));
-    }
-
-    u64 gpr20_cursor{instr.gpr20.Value()};
-    // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
-    const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
-    // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
-    // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
-
-    const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
-
-    Node4 values;
-    for (u32 element = 0; element < values.size(); ++element) {
-        auto coords_copy = coords;
-        MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
-        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
-    }
-
-    return values;
-}
-
-Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
-    SamplerInfo info;
-    info.type = texture_type;
-    info.is_array = is_array;
-    info.is_shadow = false;
-    const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
-
-    const std::size_t type_coord_count = GetCoordCount(texture_type);
-    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
-    const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
-
-    // If enabled arrays index is always stored in the gpr8 field
-    const u64 array_register = instr.gpr8.Value();
-    // if is array gpr20 is used
-    const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
-
-    const u64 last_coord_register =
-        ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
-            ? static_cast<u64>(instr.gpr20.Value())
-            : coord_register + 1;
-
-    std::vector<Node> coords;
-    for (std::size_t i = 0; i < type_coord_count; ++i) {
-        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
-        coords.push_back(
-            GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
-    }
-
-    const Node array = is_array ? GetRegister(array_register) : nullptr;
-    // When lod is used always is in gpr20
-    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
-
-    std::vector<Node> aoffi;
-    if (aoffi_enabled) {
-        aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
-    }
-
-    Node4 values;
-    for (u32 element = 0; element < values.size(); ++element) {
-        auto coords_copy = coords;
-        MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
-        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
-    }
-    return values;
-}
-
-std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
-    TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
-    std::size_t max_coords, std::size_t max_inputs) {
-    const std::size_t coord_count = GetCoordCount(texture_type);
-
-    std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
-    const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
-    if (total_coord_count > max_coords || total_reg_count > max_inputs) {
-        UNIMPLEMENTED_MSG("Unsupported Texture operation");
-        total_coord_count = std::min(total_coord_count, max_coords);
-    }
-    // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
-    total_coord_count +=
-        (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
-
-    return {coord_count, total_coord_count};
-}
-
-std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
-                                                bool is_tld4) {
-    const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
-    const u32 size = is_tld4 ? 6 : 4;
-    const s32 wrap_value = is_tld4 ? 32 : 8;
-    const s32 diff_value = is_tld4 ? 64 : 16;
-    const u32 mask = (1U << size) - 1;
-
-    std::vector<Node> aoffi;
-    aoffi.reserve(coord_count);
-
-    const auto aoffi_immediate{
-        TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
-    if (!aoffi_immediate) {
-        // Variable access, not supported on AMD.
-        LOG_WARNING(HW_GPU,
-                    "AOFFI constant folding failed, some hardware might have graphical issues");
-        for (std::size_t coord = 0; coord < coord_count; ++coord) {
-            const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
-            const Node condition =
-                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
-            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
-            aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
-        }
-        return aoffi;
-    }
-
-    for (std::size_t coord = 0; coord < coord_count; ++coord) {
-        s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
-        if (value >= wrap_value) {
-            value -= diff_value;
-        }
-        aoffi.push_back(Immediate(value));
-    }
-    return aoffi;
-}
-
-std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
-    static constexpr u32 num_entries = 8;
-
-    std::vector<Node> ptp;
-    ptp.reserve(num_entries);
-
-    const auto global_size = static_cast<s64>(global_code.size());
-    const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
-    const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
-    if (!low || !high) {
-        for (u32 entry = 0; entry < num_entries; ++entry) {
-            const u32 reg = entry / 4;
-            const u32 offset = entry % 4;
-            const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
-            const Node condition =
-                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
-            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
-            ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
-        }
-        return ptp;
-    }
-
-    const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
-    for (u32 entry = 0; entry < num_entries; ++entry) {
-        s32 value = (immediate >> (entry * 8)) & 0b111111;
-        if (value >= 32) {
-            value -= 64;
-        }
-        ptp.push_back(Immediate(value));
-    }
-
-    return ptp;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
deleted file mode 100644
index 1c0957277..000000000
--- a/src/video_core/shader/decode/video.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::VideoType;
-using Tegra::Shader::VmadShr;
-using Tegra::Shader::VmnmxOperation;
-using Tegra::Shader::VmnmxType;
-
-u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    if (opcode->get().GetId() == OpCode::Id::VMNMX) {
-        DecodeVMNMX(bb, instr);
-        return pc;
-    }
-
-    const Node op_a =
-        GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
-                        instr.video.type_a, instr.video.byte_height_a);
-    const Node op_b = [this, instr] {
-        if (instr.video.use_register_b) {
-            return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
-                                   instr.video.signed_b, instr.video.type_b,
-                                   instr.video.byte_height_b);
-        }
-        if (instr.video.signed_b) {
-            const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
-            return Immediate(static_cast<u32>(imm));
-        } else {
-            return Immediate(instr.alu.GetImm20_16());
-        }
-    }();
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::VMAD: {
-        const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
-        const Node op_c = GetRegister(instr.gpr39);
-
-        Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
-        value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
-
-        if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
-            const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
-            value =
-                SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
-        }
-
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::VSETP: {
-        // We can't use the constant predicate as destination.
-        ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
-        const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
-        const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
-        const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
-
-        const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
-
-        // Set the primary predicate to the result of Predicate OP SecondPredicate
-        SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
-
-        if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-            // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
-            // if enabled
-            const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
-            SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
-        }
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
-                               u64 byte_height) {
-    if (!is_chunk) {
-        return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
-    }
-
-    switch (type) {
-    case VideoType::Size16_Low:
-        return BitfieldExtract(op, 0, 16);
-    case VideoType::Size16_High:
-        return BitfieldExtract(op, 16, 16);
-    case VideoType::Size32:
-        // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
-        // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
-        UNIMPLEMENTED();
-        return Immediate(0);
-    case VideoType::Invalid:
-        UNREACHABLE_MSG("Invalid instruction encoding");
-        return Immediate(0);
-    default:
-        UNREACHABLE();
-        return Immediate(0);
-    }
-}
-
-void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
-    UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
-    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
-    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
-    UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
-    UNIMPLEMENTED_IF(instr.vmnmx.sat);
-    UNIMPLEMENTED_IF(instr.generates_cc);
-
-    Node op_a = GetRegister(instr.gpr8);
-    Node op_b = GetRegister(instr.gpr20);
-    Node op_c = GetRegister(instr.gpr39);
-
-    const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
-    const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
-
-    const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
-    Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
-
-    switch (instr.vmnmx.operation) {
-    case VmnmxOperation::Mrg_16H:
-        value = BitfieldInsert(move(op_c), move(value), 16, 16);
-        break;
-    case VmnmxOperation::Mrg_16L:
-        value = BitfieldInsert(move(op_c), move(value), 0, 16);
-        break;
-    case VmnmxOperation::Mrg_8B0:
-        value = BitfieldInsert(move(op_c), move(value), 0, 8);
-        break;
-    case VmnmxOperation::Mrg_8B2:
-        value = BitfieldInsert(move(op_c), move(value), 16, 8);
-        break;
-    case VmnmxOperation::Acc:
-        value = Operation(OperationCode::IAdd, move(value), move(op_c));
-        break;
-    case VmnmxOperation::Min:
-        value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
-        break;
-    case VmnmxOperation::Max:
-        value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
-        break;
-    case VmnmxOperation::Nop:
-        break;
-    default:
-        UNREACHABLE();
-        break;
-    }
-
-    SetRegister(bb, instr.gpr0, move(value));
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
deleted file mode 100644
index 37433d783..000000000
--- a/src/video_core/shader/decode/warp.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::ShuffleOperation;
-using Tegra::Shader::VoteOperation;
-
-namespace {
-
-OperationCode GetOperationCode(VoteOperation vote_op) {
-    switch (vote_op) {
-    case VoteOperation::All:
-        return OperationCode::VoteAll;
-    case VoteOperation::Any:
-        return OperationCode::VoteAny;
-    case VoteOperation::Eq:
-        return OperationCode::VoteEqual;
-    default:
-        UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
-        return OperationCode::VoteAll;
-    }
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    // Signal the backend that this shader uses warp instructions.
-    uses_warps = true;
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::VOTE: {
-        const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
-        const Node active = Operation(OperationCode::BallotThread, value);
-        const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
-        SetRegister(bb, instr.gpr0, active);
-        SetPredicate(bb, instr.vote.dest_pred, vote);
-        break;
-    }
-    case OpCode::Id::SHFL: {
-        Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
-                                           : GetRegister(instr.gpr39);
-        Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
-                                             : GetRegister(instr.gpr20);
-
-        Node thread_id = Operation(OperationCode::ThreadId);
-        Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
-        Node seg_mask = BitfieldExtract(mask, 8, 16);
-
-        Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
-        Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
-        Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
-                                       Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
-
-        Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
-            switch (instr.shfl.operation) {
-            case ShuffleOperation::Idx:
-                return Operation(OperationCode::IBitwiseOr,
-                                 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
-                                 min_thread_id);
-            case ShuffleOperation::Down:
-                return Operation(OperationCode::IAdd, thread_id, index);
-            case ShuffleOperation::Up:
-                return Operation(OperationCode::IAdd, thread_id,
-                                 Operation(OperationCode::INegate, index));
-            case ShuffleOperation::Bfly:
-                return Operation(OperationCode::IBitwiseXor, thread_id, index);
-            }
-            UNREACHABLE();
-            return Immediate(0U);
-        }();
-
-        Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
-            if (instr.shfl.operation == ShuffleOperation::Up) {
-                return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
-            } else {
-                return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
-            }
-        }();
-
-        SetPredicate(bb, instr.shfl.pred48, in_bounds);
-        SetRegister(
-            bb, instr.gpr0,
-            Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
-        break;
-    }
-    case OpCode::Id::FSWZADD: {
-        UNIMPLEMENTED_IF(instr.fswzadd.ndv);
-
-        Node op_a = GetRegister(instr.gpr8);
-        Node op_b = GetRegister(instr.gpr20);
-        Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
-        SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
-        break;
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
deleted file mode 100644
index 233b8fa42..000000000
--- a/src/video_core/shader/decode/xmad.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-
-u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    UNIMPLEMENTED_IF(instr.xmad.sign_a);
-    UNIMPLEMENTED_IF(instr.xmad.sign_b);
-    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                         "Condition codes generation in XMAD is not implemented");
-
-    Node op_a = GetRegister(instr.gpr8);
-
-    // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
-    UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
-    const bool is_signed_a = instr.xmad.sign_a == 1;
-    const bool is_signed_b = instr.xmad.sign_b == 1;
-    const bool is_signed_c = is_signed_a;
-
-    auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
-          op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::XMAD_CR:
-            return {instr.xmad.merge_56,
-                    instr.xmad.product_shift_left_second,
-                    instr.xmad.high_b,
-                    instr.xmad.mode_cbf,
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
-                    GetRegister(instr.gpr39)};
-        case OpCode::Id::XMAD_RR:
-            return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
-                    instr.xmad.mode,     GetRegister(instr.gpr20),      GetRegister(instr.gpr39)};
-        case OpCode::Id::XMAD_RC:
-            return {false,
-                    false,
-                    instr.xmad.high_b,
-                    instr.xmad.mode_cbf,
-                    GetRegister(instr.gpr39),
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
-        case OpCode::Id::XMAD_IMM:
-            return {instr.xmad.merge_37,
-                    instr.xmad.product_shift_left,
-                    false,
-                    instr.xmad.mode,
-                    Immediate(static_cast<u32>(instr.xmad.imm20_16)),
-                    GetRegister(instr.gpr39)};
-        default:
-            UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
-            return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
-        }
-    }();
-
-    op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
-                           instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
-
-    const Node original_b = op_b_binding;
-    const Node op_b =
-        SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
-                        is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
-
-    // we already check sign_a and sign_b is difference or not before so just use one in here.
-    Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
-    if (is_psl) {
-        product =
-            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
-    }
-    SetTemporary(bb, 0, product);
-    product = GetTemporary(0);
-
-    Node original_c = op_c;
-    const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
-    op_c = [&] {
-        switch (set_mode) {
-        case Tegra::Shader::XmadMode::None:
-            return original_c;
-        case Tegra::Shader::XmadMode::CLo:
-            return BitfieldExtract(std::move(original_c), 0, 16);
-        case Tegra::Shader::XmadMode::CHi:
-            return BitfieldExtract(std::move(original_c), 16, 16);
-        case Tegra::Shader::XmadMode::CBcc: {
-            Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
-                                             original_b, Immediate(16));
-            return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
-                                   std::move(shifted_b));
-        }
-        case Tegra::Shader::XmadMode::CSfu: {
-            const Node comp_a =
-                GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
-            const Node comp_b =
-                GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
-            const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
-
-            const Node comp_minus_a = GetPredicateComparisonInteger(
-                PredCondition::NE, is_signed_a,
-                SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
-                                Immediate(0x80000000)),
-                Immediate(0));
-            const Node comp_minus_b = GetPredicateComparisonInteger(
-                PredCondition::NE, is_signed_b,
-                SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
-                                Immediate(0x80000000)),
-                Immediate(0));
-
-            Node new_c = Operation(
-                OperationCode::Select, comp_minus_a,
-                SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
-                original_c);
-            new_c = Operation(
-                OperationCode::Select, comp_minus_b,
-                SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
-                std::move(new_c));
-
-            return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
-        }
-        default:
-            UNREACHABLE();
-            return Immediate(0);
-        }
-    }();
-
-    SetTemporary(bb, 1, op_c);
-    op_c = GetTemporary(1);
-
-    // TODO(Rodrigo): Use an appropiate sign for this operation
-    Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
-    SetTemporary(bb, 2, sum);
-    sum = GetTemporary(2);
-    if (is_merge) {
-        const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
-                                       Immediate(0), Immediate(16));
-        const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
-                                       Immediate(16));
-        sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
-    }
-
-    SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
-    SetRegister(bb, instr.gpr0, std::move(sum));
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
deleted file mode 100644
index 2647865d4..000000000
--- a/src/video_core/shader/expr.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <memory>
-#include <variant>
-
-#include "video_core/shader/expr.h"
-
-namespace VideoCommon::Shader {
-namespace {
-bool ExprIsBoolean(const Expr& expr) {
-    return std::holds_alternative<ExprBoolean>(*expr);
-}
-
-bool ExprBooleanGet(const Expr& expr) {
-    return std::get_if<ExprBoolean>(expr.get())->value;
-}
-} // Anonymous namespace
-
-bool ExprAnd::operator==(const ExprAnd& b) const {
-    return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
-}
-
-bool ExprAnd::operator!=(const ExprAnd& b) const {
-    return !operator==(b);
-}
-
-bool ExprOr::operator==(const ExprOr& b) const {
-    return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
-}
-
-bool ExprOr::operator!=(const ExprOr& b) const {
-    return !operator==(b);
-}
-
-bool ExprNot::operator==(const ExprNot& b) const {
-    return *operand1 == *b.operand1;
-}
-
-bool ExprNot::operator!=(const ExprNot& b) const {
-    return !operator==(b);
-}
-
-Expr MakeExprNot(Expr first) {
-    if (std::holds_alternative<ExprNot>(*first)) {
-        return std::get_if<ExprNot>(first.get())->operand1;
-    }
-    return MakeExpr<ExprNot>(std::move(first));
-}
-
-Expr MakeExprAnd(Expr first, Expr second) {
-    if (ExprIsBoolean(first)) {
-        return ExprBooleanGet(first) ? second : first;
-    }
-    if (ExprIsBoolean(second)) {
-        return ExprBooleanGet(second) ? first : second;
-    }
-    return MakeExpr<ExprAnd>(std::move(first), std::move(second));
-}
-
-Expr MakeExprOr(Expr first, Expr second) {
-    if (ExprIsBoolean(first)) {
-        return ExprBooleanGet(first) ? first : second;
-    }
-    if (ExprIsBoolean(second)) {
-        return ExprBooleanGet(second) ? second : first;
-    }
-    return MakeExpr<ExprOr>(std::move(first), std::move(second));
-}
-
-bool ExprAreEqual(const Expr& first, const Expr& second) {
-    return (*first) == (*second);
-}
-
-bool ExprAreOpposite(const Expr& first, const Expr& second) {
-    if (std::holds_alternative<ExprNot>(*first)) {
-        return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
-    }
-    if (std::holds_alternative<ExprNot>(*second)) {
-        return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
-    }
-    return false;
-}
-
-bool ExprIsTrue(const Expr& first) {
-    if (ExprIsBoolean(first)) {
-        return ExprBooleanGet(first);
-    }
-    return false;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
deleted file mode 100644
index cda284c72..000000000
--- a/src/video_core/shader/expr.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <variant>
-
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Pred;
-
-class ExprAnd;
-class ExprBoolean;
-class ExprCondCode;
-class ExprGprEqual;
-class ExprNot;
-class ExprOr;
-class ExprPredicate;
-class ExprVar;
-
-using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
-                              ExprBoolean, ExprGprEqual>;
-using Expr = std::shared_ptr<ExprData>;
-
-class ExprAnd final {
-public:
-    explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
-
-    bool operator==(const ExprAnd& b) const;
-    bool operator!=(const ExprAnd& b) const;
-
-    Expr operand1;
-    Expr operand2;
-};
-
-class ExprOr final {
-public:
-    explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
-
-    bool operator==(const ExprOr& b) const;
-    bool operator!=(const ExprOr& b) const;
-
-    Expr operand1;
-    Expr operand2;
-};
-
-class ExprNot final {
-public:
-    explicit ExprNot(Expr a) : operand1{std::move(a)} {}
-
-    bool operator==(const ExprNot& b) const;
-    bool operator!=(const ExprNot& b) const;
-
-    Expr operand1;
-};
-
-class ExprVar final {
-public:
-    explicit ExprVar(u32 index) : var_index{index} {}
-
-    bool operator==(const ExprVar& b) const {
-        return var_index == b.var_index;
-    }
-
-    bool operator!=(const ExprVar& b) const {
-        return !operator==(b);
-    }
-
-    u32 var_index;
-};
-
-class ExprPredicate final {
-public:
-    explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
-
-    bool operator==(const ExprPredicate& b) const {
-        return predicate == b.predicate;
-    }
-
-    bool operator!=(const ExprPredicate& b) const {
-        return !operator==(b);
-    }
-
-    u32 predicate;
-};
-
-class ExprCondCode final {
-public:
-    explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
-
-    bool operator==(const ExprCondCode& b) const {
-        return cc == b.cc;
-    }
-
-    bool operator!=(const ExprCondCode& b) const {
-        return !operator==(b);
-    }
-
-    ConditionCode cc;
-};
-
-class ExprBoolean final {
-public:
-    explicit ExprBoolean(bool val) : value{val} {}
-
-    bool operator==(const ExprBoolean& b) const {
-        return value == b.value;
-    }
-
-    bool operator!=(const ExprBoolean& b) const {
-        return !operator==(b);
-    }
-
-    bool value;
-};
-
-class ExprGprEqual final {
-public:
-    explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
-
-    bool operator==(const ExprGprEqual& b) const {
-        return gpr == b.gpr && value == b.value;
-    }
-
-    bool operator!=(const ExprGprEqual& b) const {
-        return !operator==(b);
-    }
-
-    u32 gpr;
-    u32 value;
-};
-
-template <typename T, typename... Args>
-Expr MakeExpr(Args&&... args) {
-    static_assert(std::is_convertible_v<T, ExprData>);
-    return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
-}
-
-bool ExprAreEqual(const Expr& first, const Expr& second);
-
-bool ExprAreOpposite(const Expr& first, const Expr& second);
-
-Expr MakeExprNot(Expr first);
-
-Expr MakeExprAnd(Expr first, Expr second);
-
-Expr MakeExprOr(Expr first, Expr second);
-
-bool ExprIsTrue(const Expr& first);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
deleted file mode 100644
index e18ccba8e..000000000
--- a/src/video_core/shader/memory_util.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cstddef>
-
-#include <boost/container_hash/hash.hpp>
-
-#include "common/common_types.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
-                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
-    const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
-    return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
-}
-
-bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // Sched instructions appear once every 4 instructions.
-    constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
-    // This is the encoded version of BRA that jumps to itself. All Nvidia
-    // shaders end with one.
-    static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
-    static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
-
-    const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
-    std::size_t offset = start_offset;
-    while (offset < program.size()) {
-        const u64 instruction = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
-                // End on Maxwell's "nop" instruction
-                break;
-            }
-            if (instruction == 0) {
-                break;
-            }
-        }
-        ++offset;
-    }
-    // The last instruction is included in the program size
-    return std::min(offset + 1, program.size());
-}
-
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
-                          const u8* host_ptr, bool is_compute) {
-    ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
-    ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
-    memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
-    code.resize(CalculateProgramSize(code, is_compute));
-    return code;
-}
-
-u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
-                        const ProgramCode& code_b) {
-    size_t unique_identifier = boost::hash_value(code);
-    if (is_a) {
-        // VertexA programs include two programs
-        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
-    }
-    return static_cast<u64>(unique_identifier);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
deleted file mode 100644
index 4624d38e6..000000000
--- a/src/video_core/shader/memory_util.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCommon::Shader {
-
-using ProgramCode = std::vector<u64>;
-
-constexpr u32 STAGE_MAIN_OFFSET = 10;
-constexpr u32 KERNEL_MAIN_OFFSET = 0;
-
-/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
-                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
-
-/// Gets if the current instruction offset is a scheduler instruction
-bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
-
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
-
-/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
-                          const u8* host_ptr, bool is_compute);
-
-/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
-                        const ProgramCode& code_b = {});
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
deleted file mode 100644
index b54d33763..000000000
--- a/src/video_core/shader/node.h
+++ /dev/null
@@ -1,701 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <optional>
-#include <string>
-#include <tuple>
-#include <utility>
-#include <variant>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-enum class OperationCode {
-    Assign, /// (float& dest, float src) -> void
-
-    Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
-
-    FAdd,          /// (MetaArithmetic, float a, float b) -> float
-    FMul,          /// (MetaArithmetic, float a, float b) -> float
-    FDiv,          /// (MetaArithmetic, float a, float b) -> float
-    FFma,          /// (MetaArithmetic, float a, float b, float c) -> float
-    FNegate,       /// (MetaArithmetic, float a) -> float
-    FAbsolute,     /// (MetaArithmetic, float a) -> float
-    FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
-    FCastHalf0,    /// (MetaArithmetic, f16vec2 a) -> float
-    FCastHalf1,    /// (MetaArithmetic, f16vec2 a) -> float
-    FMin,          /// (MetaArithmetic, float a, float b) -> float
-    FMax,          /// (MetaArithmetic, float a, float b) -> float
-    FCos,          /// (MetaArithmetic, float a) -> float
-    FSin,          /// (MetaArithmetic, float a) -> float
-    FExp2,         /// (MetaArithmetic, float a) -> float
-    FLog2,         /// (MetaArithmetic, float a) -> float
-    FInverseSqrt,  /// (MetaArithmetic, float a) -> float
-    FSqrt,         /// (MetaArithmetic, float a) -> float
-    FRoundEven,    /// (MetaArithmetic, float a) -> float
-    FFloor,        /// (MetaArithmetic, float a) -> float
-    FCeil,         /// (MetaArithmetic, float a) -> float
-    FTrunc,        /// (MetaArithmetic, float a) -> float
-    FCastInteger,  /// (MetaArithmetic, int a) -> float
-    FCastUInteger, /// (MetaArithmetic, uint a) -> float
-    FSwizzleAdd,   /// (float a, float b, uint mask) -> float
-
-    IAdd,                  /// (MetaArithmetic, int a, int b) -> int
-    IMul,                  /// (MetaArithmetic, int a, int b) -> int
-    IDiv,                  /// (MetaArithmetic, int a, int b) -> int
-    INegate,               /// (MetaArithmetic, int a) -> int
-    IAbsolute,             /// (MetaArithmetic, int a) -> int
-    IMin,                  /// (MetaArithmetic, int a, int b) -> int
-    IMax,                  /// (MetaArithmetic, int a, int b) -> int
-    ICastFloat,            /// (MetaArithmetic, float a) -> int
-    ICastUnsigned,         /// (MetaArithmetic, uint a) -> int
-    ILogicalShiftLeft,     /// (MetaArithmetic, int a, uint b) -> int
-    ILogicalShiftRight,    /// (MetaArithmetic, int a, uint b) -> int
-    IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
-    IBitwiseAnd,           /// (MetaArithmetic, int a, int b) -> int
-    IBitwiseOr,            /// (MetaArithmetic, int a, int b) -> int
-    IBitwiseXor,           /// (MetaArithmetic, int a, int b) -> int
-    IBitwiseNot,           /// (MetaArithmetic, int a) -> int
-    IBitfieldInsert,       /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
-    IBitfieldExtract,      /// (MetaArithmetic, int value, int offset, int offset) -> int
-    IBitCount,             /// (MetaArithmetic, int) -> int
-    IBitMSB,               /// (MetaArithmetic, int) -> int
-
-    UAdd,                  /// (MetaArithmetic, uint a, uint b) -> uint
-    UMul,                  /// (MetaArithmetic, uint a, uint b) -> uint
-    UDiv,                  /// (MetaArithmetic, uint a, uint b) -> uint
-    UMin,                  /// (MetaArithmetic, uint a, uint b) -> uint
-    UMax,                  /// (MetaArithmetic, uint a, uint b) -> uint
-    UCastFloat,            /// (MetaArithmetic, float a) -> uint
-    UCastSigned,           /// (MetaArithmetic, int a) -> uint
-    ULogicalShiftLeft,     /// (MetaArithmetic, uint a, uint b) -> uint
-    ULogicalShiftRight,    /// (MetaArithmetic, uint a, uint b) -> uint
-    UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
-    UBitwiseAnd,           /// (MetaArithmetic, uint a, uint b) -> uint
-    UBitwiseOr,            /// (MetaArithmetic, uint a, uint b) -> uint
-    UBitwiseXor,           /// (MetaArithmetic, uint a, uint b) -> uint
-    UBitwiseNot,           /// (MetaArithmetic, uint a) -> uint
-    UBitfieldInsert,  /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
-    UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
-    UBitCount,        /// (MetaArithmetic, uint) -> uint
-    UBitMSB,          /// (MetaArithmetic, uint) -> uint
-
-    HAdd,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HMul,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HFma,       /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
-    HAbsolute,  /// (f16vec2 a) -> f16vec2
-    HNegate,    /// (f16vec2 a, bool first, bool second) -> f16vec2
-    HClamp,     /// (f16vec2 src, float min, float max) -> f16vec2
-    HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
-    HUnpack,    /// (Tegra::Shader::HalfType, T value) -> f16vec2
-    HMergeF32,  /// (f16vec2 src) -> float
-    HMergeH0,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
-    HMergeH1,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
-    HPack2,     /// (float a, float b) -> f16vec2
-
-    LogicalAssign, /// (bool& dst, bool src) -> void
-    LogicalAnd,    /// (bool a, bool b) -> bool
-    LogicalOr,     /// (bool a, bool b) -> bool
-    LogicalXor,    /// (bool a, bool b) -> bool
-    LogicalNegate, /// (bool a) -> bool
-    LogicalPick2,  /// (bool2 pair, uint index) -> bool
-    LogicalAnd2,   /// (bool2 a) -> bool
-
-    LogicalFOrdLessThan,       /// (float a, float b) -> bool
-    LogicalFOrdEqual,          /// (float a, float b) -> bool
-    LogicalFOrdLessEqual,      /// (float a, float b) -> bool
-    LogicalFOrdGreaterThan,    /// (float a, float b) -> bool
-    LogicalFOrdNotEqual,       /// (float a, float b) -> bool
-    LogicalFOrdGreaterEqual,   /// (float a, float b) -> bool
-    LogicalFOrdered,           /// (float a, float b) -> bool
-    LogicalFUnordered,         /// (float a, float b) -> bool
-    LogicalFUnordLessThan,     /// (float a, float b) -> bool
-    LogicalFUnordEqual,        /// (float a, float b) -> bool
-    LogicalFUnordLessEqual,    /// (float a, float b) -> bool
-    LogicalFUnordGreaterThan,  /// (float a, float b) -> bool
-    LogicalFUnordNotEqual,     /// (float a, float b) -> bool
-    LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
-
-    LogicalILessThan,     /// (int a, int b) -> bool
-    LogicalIEqual,        /// (int a, int b) -> bool
-    LogicalILessEqual,    /// (int a, int b) -> bool
-    LogicalIGreaterThan,  /// (int a, int b) -> bool
-    LogicalINotEqual,     /// (int a, int b) -> bool
-    LogicalIGreaterEqual, /// (int a, int b) -> bool
-
-    LogicalULessThan,     /// (uint a, uint b) -> bool
-    LogicalUEqual,        /// (uint a, uint b) -> bool
-    LogicalULessEqual,    /// (uint a, uint b) -> bool
-    LogicalUGreaterThan,  /// (uint a, uint b) -> bool
-    LogicalUNotEqual,     /// (uint a, uint b) -> bool
-    LogicalUGreaterEqual, /// (uint a, uint b) -> bool
-
-    LogicalAddCarry, /// (uint a, uint b) -> bool
-
-    Logical2HLessThan,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HEqual,               /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HLessEqual,           /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HGreaterThan,         /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HNotEqual,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HGreaterEqual,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HLessThanWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HEqualWithNan,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HLessEqualWithNan,    /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HGreaterThanWithNan,  /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HNotEqualWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-
-    Texture,                /// (MetaTexture, float[N] coords) -> float4
-    TextureLod,             /// (MetaTexture, float[N] coords) -> float4
-    TextureGather,          /// (MetaTexture, float[N] coords) -> float4
-    TextureQueryDimensions, /// (MetaTexture, float a) -> float4
-    TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
-    TexelFetch,             /// (MetaTexture, int[N], int) -> float4
-    TextureGradient,        /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
-
-    ImageLoad,  /// (MetaImage, int[N] coords) -> void
-    ImageStore, /// (MetaImage, int[N] coords) -> void
-
-    AtomicImageAdd,      /// (MetaImage, int[N] coords) -> void
-    AtomicImageAnd,      /// (MetaImage, int[N] coords) -> void
-    AtomicImageOr,       /// (MetaImage, int[N] coords) -> void
-    AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
-    AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
-
-    AtomicUExchange, /// (memory, uint) -> uint
-    AtomicUAdd,      /// (memory, uint) -> uint
-    AtomicUMin,      /// (memory, uint) -> uint
-    AtomicUMax,      /// (memory, uint) -> uint
-    AtomicUAnd,      /// (memory, uint) -> uint
-    AtomicUOr,       /// (memory, uint) -> uint
-    AtomicUXor,      /// (memory, uint) -> uint
-
-    AtomicIExchange, /// (memory, int) -> int
-    AtomicIAdd,      /// (memory, int) -> int
-    AtomicIMin,      /// (memory, int) -> int
-    AtomicIMax,      /// (memory, int) -> int
-    AtomicIAnd,      /// (memory, int) -> int
-    AtomicIOr,       /// (memory, int) -> int
-    AtomicIXor,      /// (memory, int) -> int
-
-    ReduceUAdd, /// (memory, uint) -> void
-    ReduceUMin, /// (memory, uint) -> void
-    ReduceUMax, /// (memory, uint) -> void
-    ReduceUAnd, /// (memory, uint) -> void
-    ReduceUOr,  /// (memory, uint) -> void
-    ReduceUXor, /// (memory, uint) -> void
-
-    ReduceIAdd, /// (memory, int) -> void
-    ReduceIMin, /// (memory, int) -> void
-    ReduceIMax, /// (memory, int) -> void
-    ReduceIAnd, /// (memory, int) -> void
-    ReduceIOr,  /// (memory, int) -> void
-    ReduceIXor, /// (memory, int) -> void
-
-    Branch,         /// (uint branch_target) -> void
-    BranchIndirect, /// (uint branch_target) -> void
-    PushFlowStack,  /// (uint branch_target) -> void
-    PopFlowStack,   /// () -> void
-    Exit,           /// () -> void
-    Discard,        /// () -> void
-
-    EmitVertex,   /// () -> void
-    EndPrimitive, /// () -> void
-
-    InvocationId,       /// () -> int
-    YNegate,            /// () -> float
-    LocalInvocationIdX, /// () -> uint
-    LocalInvocationIdY, /// () -> uint
-    LocalInvocationIdZ, /// () -> uint
-    WorkGroupIdX,       /// () -> uint
-    WorkGroupIdY,       /// () -> uint
-    WorkGroupIdZ,       /// () -> uint
-
-    BallotThread, /// (bool) -> uint
-    VoteAll,      /// (bool) -> bool
-    VoteAny,      /// (bool) -> bool
-    VoteEqual,    /// (bool) -> bool
-
-    ThreadId,       /// () -> uint
-    ThreadEqMask,   /// () -> uint
-    ThreadGeMask,   /// () -> uint
-    ThreadGtMask,   /// () -> uint
-    ThreadLeMask,   /// () -> uint
-    ThreadLtMask,   /// () -> uint
-    ShuffleIndexed, /// (uint value, uint index) -> uint
-
-    Barrier,             /// () -> void
-    MemoryBarrierGroup,  /// () -> void
-    MemoryBarrierGlobal, /// () -> void
-
-    Amount,
-};
-
-enum class InternalFlag {
-    Zero = 0,
-    Sign = 1,
-    Carry = 2,
-    Overflow = 3,
-    Amount = 4,
-};
-
-enum class MetaStackClass {
-    Ssy,
-    Pbk,
-};
-
-class OperationNode;
-class ConditionalNode;
-class GprNode;
-class CustomVarNode;
-class ImmediateNode;
-class InternalFlagNode;
-class PredicateNode;
-class AbufNode;
-class CbufNode;
-class LmemNode;
-class PatchNode;
-class SmemNode;
-class GmemNode;
-class CommentNode;
-
-using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
-                              InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
-                              LmemNode, SmemNode, GmemNode, CommentNode>;
-using Node = std::shared_ptr<NodeData>;
-using Node4 = std::array<Node, 4>;
-using NodeBlock = std::vector<Node>;
-
-struct ArraySamplerNode;
-struct BindlessSamplerNode;
-struct SeparateSamplerNode;
-
-using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
-using TrackSampler = std::shared_ptr<TrackSamplerData>;
-
-struct SamplerEntry {
-    /// Bound samplers constructor
-    explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
-                          bool is_shadow_, bool is_buffer_, bool is_indexed_)
-        : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
-          is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
-
-    /// Separate sampler constructor
-    explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
-                          Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
-                          bool is_buffer_)
-        : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
-          buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
-          is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
-
-    /// Bindless samplers constructor
-    explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
-                          bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
-        : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
-          is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
-    }
-
-    u32 index = 0;            ///< Emulated index given for the this sampler.
-    u32 offset = 0;           ///< Offset in the const buffer from where the sampler is being read.
-    u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
-    u32 buffer = 0;           ///< Buffer where the bindless sampler is read.
-    u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
-    u32 size = 1;             ///< Size of the sampler.
-
-    Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
-    bool is_array = false;     ///< Whether the texture is being sampled as an array texture or not.
-    bool is_shadow = false;    ///< Whether the texture is being sampled as a depth texture or not.
-    bool is_buffer = false;    ///< Whether the texture is a texture buffer without sampler.
-    bool is_bindless = false;  ///< Whether this sampler belongs to a bindless texture or not.
-    bool is_indexed = false;   ///< Whether this sampler is an indexed array of textures.
-    bool is_separated = false; ///< Whether the image and sampler is separated or not.
-};
-
-/// Represents a tracked bindless sampler into a direct const buffer
-struct ArraySamplerNode {
-    u32 index;
-    u32 base_offset;
-    u32 bindless_var;
-};
-
-/// Represents a tracked separate sampler image pair that was folded statically
-struct SeparateSamplerNode {
-    std::pair<u32, u32> indices;
-    std::pair<u32, u32> offsets;
-};
-
-/// Represents a tracked bindless sampler into a direct const buffer
-struct BindlessSamplerNode {
-    u32 index;
-    u32 offset;
-};
-
-struct ImageEntry {
-public:
-    /// Bound images constructor
-    explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
-        : index{index_}, offset{offset_}, type{type_} {}
-
-    /// Bindless samplers constructor
-    explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
-        : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
-
-    void MarkWrite() {
-        is_written = true;
-    }
-
-    void MarkRead() {
-        is_read = true;
-    }
-
-    void MarkAtomic() {
-        MarkWrite();
-        MarkRead();
-        is_atomic = true;
-    }
-
-    u32 index = 0;
-    u32 offset = 0;
-    u32 buffer = 0;
-
-    Tegra::Shader::ImageType type{};
-    bool is_bindless = false;
-    bool is_written = false;
-    bool is_read = false;
-    bool is_atomic = false;
-};
-
-struct GlobalMemoryBase {
-    u32 cbuf_index = 0;
-    u32 cbuf_offset = 0;
-
-    [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
-        return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
-    }
-};
-
-/// Parameters describing an arithmetic operation
-struct MetaArithmetic {
-    bool precise{}; ///< Whether the operation can be constraint or not
-};
-
-/// Parameters describing a texture sampler
-struct MetaTexture {
-    SamplerEntry sampler;
-    Node array;
-    Node depth_compare;
-    std::vector<Node> aoffi;
-    std::vector<Node> ptp;
-    std::vector<Node> derivates;
-    Node bias;
-    Node lod;
-    Node component;
-    u32 element{};
-    Node index;
-};
-
-struct MetaImage {
-    const ImageEntry& image;
-    std::vector<Node> values;
-    u32 element{};
-};
-
-/// Parameters that modify an operation but are not part of any particular operand
-using Meta =
-    std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
-
-class AmendNode {
-public:
-    [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
-        if (amend_index == amend_null_index) {
-            return std::nullopt;
-        }
-        return {amend_index};
-    }
-
-    void SetAmendIndex(std::size_t index) {
-        amend_index = index;
-    }
-
-    void ClearAmend() {
-        amend_index = amend_null_index;
-    }
-
-private:
-    static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
-    std::size_t amend_index{amend_null_index};
-};
-
-/// Holds any kind of operation that can be done in the IR
-class OperationNode final : public AmendNode {
-public:
-    explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
-
-    explicit OperationNode(OperationCode code_, Meta meta_)
-        : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
-
-    explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
-        : OperationNode(code_, Meta{}, std::move(operands_)) {}
-
-    explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
-        : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
-
-    template <typename... Args>
-    explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
-        : code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
-
-    [[nodiscard]] OperationCode GetCode() const {
-        return code;
-    }
-
-    [[nodiscard]] const Meta& GetMeta() const {
-        return meta;
-    }
-
-    [[nodiscard]] std::size_t GetOperandsCount() const {
-        return operands.size();
-    }
-
-    [[nodiscard]] const Node& operator[](std::size_t operand_index) const {
-        return operands.at(operand_index);
-    }
-
-private:
-    OperationCode code{};
-    Meta meta{};
-    std::vector<Node> operands;
-};
-
-/// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final : public AmendNode {
-public:
-    explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
-        : condition{std::move(condition_)}, code{std::move(code_)} {}
-
-    [[nodiscard]] const Node& GetCondition() const {
-        return condition;
-    }
-
-    [[nodiscard]] const std::vector<Node>& GetCode() const {
-        return code;
-    }
-
-private:
-    Node condition;         ///< Condition to be satisfied
-    std::vector<Node> code; ///< Code to execute
-};
-
-/// A general purpose register
-class GprNode final {
-public:
-    explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
-
-    [[nodiscard]] constexpr u32 GetIndex() const {
-        return static_cast<u32>(index);
-    }
-
-private:
-    Tegra::Shader::Register index{};
-};
-
-/// A custom variable
-class CustomVarNode final {
-public:
-    explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
-
-    [[nodiscard]] constexpr u32 GetIndex() const {
-        return index;
-    }
-
-private:
-    u32 index{};
-};
-
-/// A 32-bits value that represents an immediate value
-class ImmediateNode final {
-public:
-    explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
-
-    [[nodiscard]] constexpr u32 GetValue() const {
-        return value;
-    }
-
-private:
-    u32 value{};
-};
-
-/// One of Maxwell's internal flags
-class InternalFlagNode final {
-public:
-    explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
-
-    [[nodiscard]] constexpr InternalFlag GetFlag() const {
-        return flag;
-    }
-
-private:
-    InternalFlag flag{};
-};
-
-/// A predicate register, it can be negated without additional nodes
-class PredicateNode final {
-public:
-    explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
-        : index{index_}, negated{negated_} {}
-
-    [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
-        return index;
-    }
-
-    [[nodiscard]] constexpr bool IsNegated() const {
-        return negated;
-    }
-
-private:
-    Tegra::Shader::Pred index{};
-    bool negated{};
-};
-
-/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
-class AbufNode final {
-public:
-    // Initialize for standard attributes (index is explicit).
-    explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
-        : buffer{std::move(buffer_)}, index{index_}, element{element_} {}
-
-    // Initialize for physical attributes (index is a variable value).
-    explicit AbufNode(Node physical_address_, Node buffer_ = {})
-        : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
-
-    [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
-        return index;
-    }
-
-    [[nodiscard]] u32 GetElement() const {
-        return element;
-    }
-
-    [[nodiscard]] const Node& GetBuffer() const {
-        return buffer;
-    }
-
-    [[nodiscard]] bool IsPhysicalBuffer() const {
-        return static_cast<bool>(physical_address);
-    }
-
-    [[nodiscard]] const Node& GetPhysicalAddress() const {
-        return physical_address;
-    }
-
-private:
-    Node physical_address;
-    Node buffer;
-    Tegra::Shader::Attribute::Index index{};
-    u32 element{};
-};
-
-/// Patch memory (used to communicate tessellation stages).
-class PatchNode final {
-public:
-    explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
-
-    [[nodiscard]] constexpr u32 GetOffset() const {
-        return offset;
-    }
-
-private:
-    u32 offset{};
-};
-
-/// Constant buffer node, usually mapped to uniform buffers in GLSL
-class CbufNode final {
-public:
-    explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
-
-    [[nodiscard]] u32 GetIndex() const {
-        return index;
-    }
-
-    [[nodiscard]] const Node& GetOffset() const {
-        return offset;
-    }
-
-private:
-    u32 index{};
-    Node offset;
-};
-
-/// Local memory node
-class LmemNode final {
-public:
-    explicit LmemNode(Node address_) : address{std::move(address_)} {}
-
-    [[nodiscard]] const Node& GetAddress() const {
-        return address;
-    }
-
-private:
-    Node address;
-};
-
-/// Shared memory node
-class SmemNode final {
-public:
-    explicit SmemNode(Node address_) : address{std::move(address_)} {}
-
-    [[nodiscard]] const Node& GetAddress() const {
-        return address;
-    }
-
-private:
-    Node address;
-};
-
-/// Global memory node
-class GmemNode final {
-public:
-    explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
-        : real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
-          descriptor{descriptor_} {}
-
-    [[nodiscard]] const Node& GetRealAddress() const {
-        return real_address;
-    }
-
-    [[nodiscard]] const Node& GetBaseAddress() const {
-        return base_address;
-    }
-
-    [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
-        return descriptor;
-    }
-
-private:
-    Node real_address;
-    Node base_address;
-    GlobalMemoryBase descriptor;
-};
-
-/// Commentary, can be dropped
-class CommentNode final {
-public:
-    explicit CommentNode(std::string text_) : text{std::move(text_)} {}
-
-    [[nodiscard]] const std::string& GetText() const {
-        return text;
-    }
-
-private:
-    std::string text;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
deleted file mode 100644
index 6a5b6940d..000000000
--- a/src/video_core/shader/node_helper.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-Node Conditional(Node condition, std::vector<Node> code) {
-    return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
-}
-
-Node Comment(std::string text) {
-    return MakeNode<CommentNode>(std::move(text));
-}
-
-Node Immediate(u32 value) {
-    return MakeNode<ImmediateNode>(value);
-}
-
-Node Immediate(s32 value) {
-    return Immediate(static_cast<u32>(value));
-}
-
-Node Immediate(f32 value) {
-    u32 integral;
-    std::memcpy(&integral, &value, sizeof(u32));
-    return Immediate(integral);
-}
-
-OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
-    if (is_signed) {
-        return operation_code;
-    }
-    switch (operation_code) {
-    case OperationCode::FCastInteger:
-        return OperationCode::FCastUInteger;
-    case OperationCode::IAdd:
-        return OperationCode::UAdd;
-    case OperationCode::IMul:
-        return OperationCode::UMul;
-    case OperationCode::IDiv:
-        return OperationCode::UDiv;
-    case OperationCode::IMin:
-        return OperationCode::UMin;
-    case OperationCode::IMax:
-        return OperationCode::UMax;
-    case OperationCode::ICastFloat:
-        return OperationCode::UCastFloat;
-    case OperationCode::ICastUnsigned:
-        return OperationCode::UCastSigned;
-    case OperationCode::ILogicalShiftLeft:
-        return OperationCode::ULogicalShiftLeft;
-    case OperationCode::ILogicalShiftRight:
-        return OperationCode::ULogicalShiftRight;
-    case OperationCode::IArithmeticShiftRight:
-        return OperationCode::UArithmeticShiftRight;
-    case OperationCode::IBitwiseAnd:
-        return OperationCode::UBitwiseAnd;
-    case OperationCode::IBitwiseOr:
-        return OperationCode::UBitwiseOr;
-    case OperationCode::IBitwiseXor:
-        return OperationCode::UBitwiseXor;
-    case OperationCode::IBitwiseNot:
-        return OperationCode::UBitwiseNot;
-    case OperationCode::IBitfieldExtract:
-        return OperationCode::UBitfieldExtract;
-    case OperationCode::IBitfieldInsert:
-        return OperationCode::UBitfieldInsert;
-    case OperationCode::IBitCount:
-        return OperationCode::UBitCount;
-    case OperationCode::LogicalILessThan:
-        return OperationCode::LogicalULessThan;
-    case OperationCode::LogicalIEqual:
-        return OperationCode::LogicalUEqual;
-    case OperationCode::LogicalILessEqual:
-        return OperationCode::LogicalULessEqual;
-    case OperationCode::LogicalIGreaterThan:
-        return OperationCode::LogicalUGreaterThan;
-    case OperationCode::LogicalINotEqual:
-        return OperationCode::LogicalUNotEqual;
-    case OperationCode::LogicalIGreaterEqual:
-        return OperationCode::LogicalUGreaterEqual;
-    case OperationCode::AtomicIExchange:
-        return OperationCode::AtomicUExchange;
-    case OperationCode::AtomicIAdd:
-        return OperationCode::AtomicUAdd;
-    case OperationCode::AtomicIMin:
-        return OperationCode::AtomicUMin;
-    case OperationCode::AtomicIMax:
-        return OperationCode::AtomicUMax;
-    case OperationCode::AtomicIAnd:
-        return OperationCode::AtomicUAnd;
-    case OperationCode::AtomicIOr:
-        return OperationCode::AtomicUOr;
-    case OperationCode::AtomicIXor:
-        return OperationCode::AtomicUXor;
-    case OperationCode::INegate:
-        UNREACHABLE_MSG("Can't negate an unsigned integer");
-        return {};
-    case OperationCode::IAbsolute:
-        UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
-        return {};
-    default:
-        UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
-        return {};
-    }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
deleted file mode 100644
index 1e0886185..000000000
--- a/src/video_core/shader/node_helper.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/shader/node.h"
-
-namespace VideoCommon::Shader {
-
-/// This arithmetic operation cannot be constraint
-inline constexpr MetaArithmetic PRECISE = {true};
-/// This arithmetic operation can be optimized away
-inline constexpr MetaArithmetic NO_PRECISE = {false};
-
-/// Creates a conditional node
-Node Conditional(Node condition, std::vector<Node> code);
-
-/// Creates a commentary node
-Node Comment(std::string text);
-
-/// Creates an u32 immediate
-Node Immediate(u32 value);
-
-/// Creates a s32 immediate
-Node Immediate(s32 value);
-
-/// Creates a f32 immediate
-Node Immediate(f32 value);
-
-/// Converts an signed operation code to an unsigned operation code
-OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
-
-template <typename T, typename... Args>
-Node MakeNode(Args&&... args) {
-    static_assert(std::is_convertible_v<T, NodeData>);
-    return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
-}
-
-template <typename T, typename... Args>
-TrackSampler MakeTrackSampler(Args&&... args) {
-    static_assert(std::is_convertible_v<T, TrackSamplerData>);
-    return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
-}
-
-template <typename... Args>
-Node Operation(OperationCode code, Args&&... args) {
-    if constexpr (sizeof...(args) == 0) {
-        return MakeNode<OperationNode>(code);
-    } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
-                                               Meta>) {
-        return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
-    } else {
-        return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
-    }
-}
-
-template <typename... Args>
-Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
-    return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
deleted file mode 100644
index 148d91fcb..000000000
--- a/src/video_core/shader/registry.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <tuple>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/kepler_compute.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Engines::ConstBufferEngineInterface;
-using Tegra::Engines::SamplerDescriptor;
-using Tegra::Engines::ShaderType;
-
-namespace {
-
-GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
-    if (shader_stage == ShaderType::Compute) {
-        return {};
-    }
-
-    auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
-
-    return {
-        .tfb_layouts = graphics.regs.tfb_layouts,
-        .tfb_varying_locs = graphics.regs.tfb_varying_locs,
-        .primitive_topology = graphics.regs.draw.topology,
-        .tessellation_primitive = graphics.regs.tess_mode.prim,
-        .tessellation_spacing = graphics.regs.tess_mode.spacing,
-        .tfb_enabled = graphics.regs.tfb_enabled != 0,
-        .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
-    };
-}
-
-ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
-    if (shader_stage != ShaderType::Compute) {
-        return {};
-    }
-
-    auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
-    const auto& launch = compute.launch_description;
-
-    return {
-        .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
-        .shared_memory_size_in_words = launch.shared_alloc,
-        .local_memory_size_in_words = launch.local_pos_alloc,
-    };
-}
-
-} // Anonymous namespace
-
-Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
-    : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
-      bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
-
-Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
-    : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
-      graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
-                                                                  shader_stage, engine_)} {}
-
-Registry::~Registry() = default;
-
-std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
-    const std::pair<u32, u32> key = {buffer, offset};
-    const auto iter = keys.find(key);
-    if (iter != keys.end()) {
-        return iter->second;
-    }
-    if (!engine) {
-        return std::nullopt;
-    }
-    const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
-    keys.emplace(key, value);
-    return value;
-}
-
-std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
-    const u32 key = offset;
-    const auto iter = bound_samplers.find(key);
-    if (iter != bound_samplers.end()) {
-        return iter->second;
-    }
-    if (!engine) {
-        return std::nullopt;
-    }
-    const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
-    bound_samplers.emplace(key, value);
-    return value;
-}
-
-std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
-    std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
-    SeparateSamplerKey key;
-    key.buffers = buffers;
-    key.offsets = offsets;
-    const auto iter = separate_samplers.find(key);
-    if (iter != separate_samplers.end()) {
-        return iter->second;
-    }
-    if (!engine) {
-        return std::nullopt;
-    }
-
-    const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
-    const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
-    const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
-    separate_samplers.emplace(key, value);
-    return value;
-}
-
-std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
-    const std::pair key = {buffer, offset};
-    const auto iter = bindless_samplers.find(key);
-    if (iter != bindless_samplers.end()) {
-        return iter->second;
-    }
-    if (!engine) {
-        return std::nullopt;
-    }
-    const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
-    bindless_samplers.emplace(key, value);
-    return value;
-}
-
-void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
-    keys.insert_or_assign({buffer, offset}, value);
-}
-
-void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
-    bound_samplers.insert_or_assign(offset, sampler);
-}
-
-void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
-    bindless_samplers.insert_or_assign({buffer, offset}, sampler);
-}
-
-bool Registry::IsConsistent() const {
-    if (!engine) {
-        return true;
-    }
-    return std::all_of(keys.begin(), keys.end(),
-                       [this](const auto& pair) {
-                           const auto [cbuf, offset] = pair.first;
-                           const auto value = pair.second;
-                           return value == engine->AccessConstBuffer32(stage, cbuf, offset);
-                       }) &&
-           std::all_of(bound_samplers.begin(), bound_samplers.end(),
-                       [this](const auto& sampler) {
-                           const auto [key, value] = sampler;
-                           return value == engine->AccessBoundSampler(stage, key);
-                       }) &&
-           std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
-                       [this](const auto& sampler) {
-                           const auto [cbuf, offset] = sampler.first;
-                           const auto value = sampler.second;
-                           return value == engine->AccessBindlessSampler(stage, cbuf, offset);
-                       });
-}
-
-bool Registry::HasEqualKeys(const Registry& rhs) const {
-    return std::tie(keys, bound_samplers, bindless_samplers) ==
-           std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
-}
-
-const GraphicsInfo& Registry::GetGraphicsInfo() const {
-    ASSERT(stage != Tegra::Engines::ShaderType::Compute);
-    return graphics_info;
-}
-
-const ComputeInfo& Registry::GetComputeInfo() const {
-    ASSERT(stage == Tegra::Engines::ShaderType::Compute);
-    return compute_info;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
deleted file mode 100644
index 4bebefdde..000000000
--- a/src/video_core/shader/registry.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <optional>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-#include "common/common_types.h"
-#include "common/hash.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCommon::Shader {
-
-struct SeparateSamplerKey {
-    std::pair<u32, u32> buffers;
-    std::pair<u32, u32> offsets;
-};
-
-} // namespace VideoCommon::Shader
-
-namespace std {
-
-template <>
-struct hash<VideoCommon::Shader::SeparateSamplerKey> {
-    std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
-        return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
-                                key.offsets.second);
-    }
-};
-
-template <>
-struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
-    bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
-                    const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
-        return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
-    }
-};
-
-} // namespace std
-
-namespace VideoCommon::Shader {
-
-using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
-using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
-using SeparateSamplerMap =
-    std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
-using BindlessSamplerMap =
-    std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
-
-struct GraphicsInfo {
-    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-    std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
-        tfb_layouts{};
-    std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
-    Maxwell::PrimitiveTopology primitive_topology{};
-    Maxwell::TessellationPrimitive tessellation_primitive{};
-    Maxwell::TessellationSpacing tessellation_spacing{};
-    bool tfb_enabled = false;
-    bool tessellation_clockwise = false;
-};
-static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
-              std::is_standard_layout_v<GraphicsInfo>);
-
-struct ComputeInfo {
-    std::array<u32, 3> workgroup_size{};
-    u32 shared_memory_size_in_words = 0;
-    u32 local_memory_size_in_words = 0;
-};
-static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
-
-struct SerializedRegistryInfo {
-    VideoCore::GuestDriverProfile guest_driver_profile;
-    u32 bound_buffer = 0;
-    GraphicsInfo graphics;
-    ComputeInfo compute;
-};
-
-/**
- * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
- * With it, the shader can obtain required data from GPU state and store it for disk shader
- * compilation.
- */
-class Registry {
-public:
-    explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
-
-    explicit Registry(Tegra::Engines::ShaderType shader_stage,
-                      Tegra::Engines::ConstBufferEngineInterface& engine_);
-
-    ~Registry();
-
-    /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
-    /// not it will obtain it from maxwell3d and register it.
-    std::optional<u32> ObtainKey(u32 buffer, u32 offset);
-
-    std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
-
-    std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
-        std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
-
-    std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
-
-    /// Inserts a key.
-    void InsertKey(u32 buffer, u32 offset, u32 value);
-
-    /// Inserts a bound sampler key.
-    void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
-    /// Inserts a bindless sampler key.
-    void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
-    /// Checks keys and samplers against engine's current const buffers.
-    /// Returns true if they are the same value, false otherwise.
-    bool IsConsistent() const;
-
-    /// Returns true if the keys are equal to the other ones in the registry.
-    bool HasEqualKeys(const Registry& rhs) const;
-
-    /// Returns graphics information from this shader
-    const GraphicsInfo& GetGraphicsInfo() const;
-
-    /// Returns compute information from this shader
-    const ComputeInfo& GetComputeInfo() const;
-
-    /// Gives an getter to the const buffer keys in the database.
-    const KeyMap& GetKeys() const {
-        return keys;
-    }
-
-    /// Gets samplers database.
-    const BoundSamplerMap& GetBoundSamplers() const {
-        return bound_samplers;
-    }
-
-    /// Gets bindless samplers database.
-    const BindlessSamplerMap& GetBindlessSamplers() const {
-        return bindless_samplers;
-    }
-
-    /// Gets bound buffer used on this shader
-    u32 GetBoundBuffer() const {
-        return bound_buffer;
-    }
-
-    /// Obtains access to the guest driver's profile.
-    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
-        return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
-    }
-
-private:
-    const Tegra::Engines::ShaderType stage;
-    VideoCore::GuestDriverProfile stored_guest_driver_profile;
-    Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
-    KeyMap keys;
-    BoundSamplerMap bound_samplers;
-    SeparateSamplerMap separate_samplers;
-    BindlessSamplerMap bindless_samplers;
-    u32 bound_buffer;
-    GraphicsInfo graphics_info;
-    ComputeInfo compute_info;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
deleted file mode 100644
index a4987ffc6..000000000
--- a/src/video_core/shader/shader_ir.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cmath>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Attribute;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::IpaMode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::PredCondition;
-using Tegra::Shader::PredOperation;
-using Tegra::Shader::Register;
-
-ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
-                   Registry& registry_)
-    : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
-                                                                                       registry_} {
-    Decode();
-    PostDecode();
-}
-
-ShaderIR::~ShaderIR() = default;
-
-Node ShaderIR::GetRegister(Register reg) {
-    if (reg != Register::ZeroIndex) {
-        used_registers.insert(static_cast<u32>(reg));
-    }
-    return MakeNode<GprNode>(reg);
-}
-
-Node ShaderIR::GetCustomVariable(u32 id) {
-    return MakeNode<CustomVarNode>(id);
-}
-
-Node ShaderIR::GetImmediate19(Instruction instr) {
-    return Immediate(instr.alu.GetImm20_19());
-}
-
-Node ShaderIR::GetImmediate32(Instruction instr) {
-    return Immediate(instr.alu.GetImm20_32());
-}
-
-Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
-    const auto index = static_cast<u32>(index_);
-    const auto offset = static_cast<u32>(offset_);
-
-    used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
-
-    return MakeNode<CbufNode>(index, Immediate(offset));
-}
-
-Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
-    const auto index = static_cast<u32>(index_);
-    const auto offset = static_cast<u32>(offset_);
-
-    used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
-
-    Node final_offset = [&] {
-        // Attempt to inline constant buffer without a variable offset. This is done to allow
-        // tracking LDC calls.
-        if (const auto gpr = std::get_if<GprNode>(&*node)) {
-            if (gpr->GetIndex() == Register::ZeroIndex) {
-                return Immediate(offset);
-            }
-        }
-        return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
-    }();
-    return MakeNode<CbufNode>(index, std::move(final_offset));
-}
-
-Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
-    const auto pred = static_cast<Pred>(pred_);
-    if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
-        used_predicates.insert(pred);
-    }
-
-    return MakeNode<PredicateNode>(pred, negated);
-}
-
-Node ShaderIR::GetPredicate(bool immediate) {
-    return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
-}
-
-Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
-    MarkAttributeUsage(index, element);
-    used_input_attributes.emplace(index);
-    return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
-}
-
-Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
-    uses_physical_attributes = true;
-    return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
-}
-
-Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
-    MarkAttributeUsage(index, element);
-    used_output_attributes.insert(index);
-    return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
-}
-
-Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
-    Node node = MakeNode<InternalFlagNode>(flag);
-    if (negated) {
-        return Operation(OperationCode::LogicalNegate, std::move(node));
-    }
-    return node;
-}
-
-Node ShaderIR::GetLocalMemory(Node address) {
-    return MakeNode<LmemNode>(std::move(address));
-}
-
-Node ShaderIR::GetSharedMemory(Node address) {
-    return MakeNode<SmemNode>(std::move(address));
-}
-
-Node ShaderIR::GetTemporary(u32 id) {
-    return GetRegister(Register::ZeroIndex + 1 + id);
-}
-
-Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
-    if (absolute) {
-        value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
-    }
-    if (negate) {
-        value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
-    }
-    return value;
-}
-
-Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
-    if (!saturate) {
-        return value;
-    }
-
-    Node positive_zero = Immediate(std::copysignf(0, 1));
-    Node positive_one = Immediate(1.0f);
-    return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
-                     std::move(positive_one));
-}
-
-Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
-    switch (size) {
-    case Register::Size::Byte:
-        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
-                                std::move(value), Immediate(24));
-        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
-                                std::move(value), Immediate(24));
-        return value;
-    case Register::Size::Short:
-        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
-                                std::move(value), Immediate(16));
-        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
-                                std::move(value), Immediate(16));
-        return value;
-    case Register::Size::Word:
-        // Default - do nothing
-        return value;
-    default:
-        UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
-        return value;
-    }
-}
-
-Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
-    if (!is_signed) {
-        // Absolute or negate on an unsigned is pointless
-        return value;
-    }
-    if (absolute) {
-        value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
-    }
-    if (negate) {
-        value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
-    }
-    return value;
-}
-
-Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
-    Node value = Immediate(instr.half_imm.PackImmediates());
-    if (!has_negation) {
-        return value;
-    }
-
-    Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
-    Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
-
-    return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
-                     std::move(second_negate));
-}
-
-Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
-    return Operation(OperationCode::HUnpack, type, std::move(value));
-}
-
-Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
-    switch (merge) {
-    case Tegra::Shader::HalfMerge::H0_H1:
-        return src;
-    case Tegra::Shader::HalfMerge::F32:
-        return Operation(OperationCode::HMergeF32, std::move(src));
-    case Tegra::Shader::HalfMerge::Mrg_H0:
-        return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
-    case Tegra::Shader::HalfMerge::Mrg_H1:
-        return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
-    }
-    UNREACHABLE();
-    return src;
-}
-
-Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
-    if (absolute) {
-        value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
-    }
-    if (negate) {
-        value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
-                          GetPredicate(true));
-    }
-    return value;
-}
-
-Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
-    if (!saturate) {
-        return value;
-    }
-
-    Node positive_zero = Immediate(std::copysignf(0, 1));
-    Node positive_one = Immediate(1.0f);
-    return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
-                     std::move(positive_one));
-}
-
-Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
-    if (condition == PredCondition::T) {
-        return GetPredicate(true);
-    } else if (condition == PredCondition::F) {
-        return GetPredicate(false);
-    }
-
-    static constexpr std::array comparison_table{
-        OperationCode(0),
-        OperationCode::LogicalFOrdLessThan,       // LT
-        OperationCode::LogicalFOrdEqual,          // EQ
-        OperationCode::LogicalFOrdLessEqual,      // LE
-        OperationCode::LogicalFOrdGreaterThan,    // GT
-        OperationCode::LogicalFOrdNotEqual,       // NE
-        OperationCode::LogicalFOrdGreaterEqual,   // GE
-        OperationCode::LogicalFOrdered,           // NUM
-        OperationCode::LogicalFUnordered,         // NAN
-        OperationCode::LogicalFUnordLessThan,     // LTU
-        OperationCode::LogicalFUnordEqual,        // EQU
-        OperationCode::LogicalFUnordLessEqual,    // LEU
-        OperationCode::LogicalFUnordGreaterThan,  // GTU
-        OperationCode::LogicalFUnordNotEqual,     // NEU
-        OperationCode::LogicalFUnordGreaterEqual, // GEU
-    };
-    const std::size_t index = static_cast<std::size_t>(condition);
-    ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
-
-    return Operation(comparison_table[index], op_a, op_b);
-}
-
-Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
-                                             Node op_b) {
-    static constexpr std::array comparison_table{
-        std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
-        std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
-        std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
-        std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
-        std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
-        std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
-    };
-
-    const auto comparison =
-        std::find_if(comparison_table.cbegin(), comparison_table.cend(),
-                     [condition](const auto entry) { return condition == entry.first; });
-    UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
-                         "Unknown predicate comparison operation");
-
-    return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
-                           std::move(op_b));
-}
-
-Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
-                                          Node op_b) {
-    static constexpr std::array comparison_table{
-        std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
-        std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
-        std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
-        std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
-        std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
-        std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
-        std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
-        std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
-        std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
-        std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
-        std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
-    };
-
-    const auto comparison =
-        std::find_if(comparison_table.cbegin(), comparison_table.cend(),
-                     [condition](const auto entry) { return condition == entry.first; });
-    UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
-                         "Unknown predicate comparison operation");
-
-    return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
-}
-
-OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
-    static constexpr std::array operation_table{
-        OperationCode::LogicalAnd,
-        OperationCode::LogicalOr,
-        OperationCode::LogicalXor,
-    };
-
-    const auto index = static_cast<std::size_t>(operation);
-    if (index >= operation_table.size()) {
-        UNIMPLEMENTED_MSG("Unknown predicate operation.");
-        return {};
-    }
-
-    return operation_table[index];
-}
-
-Node ShaderIR::GetConditionCode(ConditionCode cc) const {
-    switch (cc) {
-    case ConditionCode::NEU:
-        return GetInternalFlag(InternalFlag::Zero, true);
-    case ConditionCode::FCSM_TR:
-        UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
-        return MakeNode<PredicateNode>(Pred::NeverExecute, false);
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
-        return MakeNode<PredicateNode>(Pred::NeverExecute, false);
-    }
-}
-
-void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
-    bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
-}
-
-void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
-    bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
-}
-
-void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
-    bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
-}
-
-void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
-    bb.push_back(
-        Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
-}
-
-void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
-    bb.push_back(
-        Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
-}
-
-void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
-    SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
-}
-
-void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
-    if (!sets_cc) {
-        return;
-    }
-    Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
-    SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
-    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
-}
-
-void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
-    if (!sets_cc) {
-        return;
-    }
-    Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
-    SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
-    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
-}
-
-Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
-    return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
-                     Immediate(offset), Immediate(bits));
-}
-
-Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
-    return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
-                     Immediate(bits));
-}
-
-void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
-    switch (index) {
-    case Attribute::Index::LayerViewportPointSize:
-        switch (element) {
-        case 0:
-            UNIMPLEMENTED();
-            break;
-        case 1:
-            uses_layer = true;
-            break;
-        case 2:
-            uses_viewport_index = true;
-            break;
-        case 3:
-            uses_point_size = true;
-            break;
-        }
-        break;
-    case Attribute::Index::TessCoordInstanceIDVertexID:
-        switch (element) {
-        case 2:
-            uses_instance_id = true;
-            break;
-        case 3:
-            uses_vertex_id = true;
-            break;
-        }
-        break;
-    case Attribute::Index::ClipDistances0123:
-    case Attribute::Index::ClipDistances4567: {
-        const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
-        used_clip_distances.at(clip_index) = true;
-        break;
-    }
-    case Attribute::Index::FrontColor:
-    case Attribute::Index::FrontSecondaryColor:
-    case Attribute::Index::BackColor:
-    case Attribute::Index::BackSecondaryColor:
-        uses_legacy_varyings = true;
-        break;
-    default:
-        if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
-            uses_legacy_varyings = true;
-        }
-        break;
-    }
-}
-
-std::size_t ShaderIR::DeclareAmend(Node new_amend) {
-    const auto id = amend_code.size();
-    amend_code.push_back(std::move(new_amend));
-    return id;
-}
-
-u32 ShaderIR::NewCustomVariable() {
-    return num_custom_variables++;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
deleted file mode 100644
index 1cd7c14d7..000000000
--- a/src/video_core/shader/shader_ir.h
+++ /dev/null
@@ -1,479 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <list>
-#include <map>
-#include <optional>
-#include <set>
-#include <tuple>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-struct ShaderBlock;
-
-constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
-
-struct ConstBuffer {
-    constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
-        : max_offset{max_offset_}, is_indirect{is_indirect_} {}
-
-    constexpr ConstBuffer() = default;
-
-    void MarkAsUsed(u64 offset) {
-        max_offset = std::max(max_offset, static_cast<u32>(offset));
-    }
-
-    void MarkAsUsedIndirect() {
-        is_indirect = true;
-    }
-
-    bool IsIndirect() const {
-        return is_indirect;
-    }
-
-    u32 GetSize() const {
-        return max_offset + static_cast<u32>(sizeof(float));
-    }
-
-    u32 GetMaxOffset() const {
-        return max_offset;
-    }
-
-private:
-    u32 max_offset = 0;
-    bool is_indirect = false;
-};
-
-struct GlobalMemoryUsage {
-    bool is_read{};
-    bool is_written{};
-};
-
-class ShaderIR final {
-public:
-    explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
-                      CompilerSettings settings_, Registry& registry_);
-    ~ShaderIR();
-
-    const std::map<u32, NodeBlock>& GetBasicBlocks() const {
-        return basic_blocks;
-    }
-
-    const std::set<u32>& GetRegisters() const {
-        return used_registers;
-    }
-
-    const std::set<Tegra::Shader::Pred>& GetPredicates() const {
-        return used_predicates;
-    }
-
-    const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
-        return used_input_attributes;
-    }
-
-    const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
-        return used_output_attributes;
-    }
-
-    const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
-        return used_cbufs;
-    }
-
-    const std::list<SamplerEntry>& GetSamplers() const {
-        return used_samplers;
-    }
-
-    const std::list<ImageEntry>& GetImages() const {
-        return used_images;
-    }
-
-    const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
-        const {
-        return used_clip_distances;
-    }
-
-    const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
-        return used_global_memory;
-    }
-
-    std::size_t GetLength() const {
-        return static_cast<std::size_t>(coverage_end * sizeof(u64));
-    }
-
-    bool UsesLayer() const {
-        return uses_layer;
-    }
-
-    bool UsesViewportIndex() const {
-        return uses_viewport_index;
-    }
-
-    bool UsesPointSize() const {
-        return uses_point_size;
-    }
-
-    bool UsesInstanceId() const {
-        return uses_instance_id;
-    }
-
-    bool UsesVertexId() const {
-        return uses_vertex_id;
-    }
-
-    bool UsesLegacyVaryings() const {
-        return uses_legacy_varyings;
-    }
-
-    bool UsesYNegate() const {
-        return uses_y_negate;
-    }
-
-    bool UsesWarps() const {
-        return uses_warps;
-    }
-
-    bool HasPhysicalAttributes() const {
-        return uses_physical_attributes;
-    }
-
-    const Tegra::Shader::Header& GetHeader() const {
-        return header;
-    }
-
-    bool IsFlowStackDisabled() const {
-        return disable_flow_stack;
-    }
-
-    bool IsDecompiled() const {
-        return decompiled;
-    }
-
-    const ASTManager& GetASTManager() const {
-        return program_manager;
-    }
-
-    ASTNode GetASTProgram() const {
-        return program_manager.GetProgram();
-    }
-
-    u32 GetASTNumVariables() const {
-        return program_manager.GetVariables();
-    }
-
-    u32 ConvertAddressToNvidiaSpace(u32 address) const {
-        return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
-    }
-
-    /// Returns a condition code evaluated from internal flags
-    Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
-
-    const Node& GetAmendNode(std::size_t index) const {
-        return amend_code[index];
-    }
-
-    u32 GetNumCustomVariables() const {
-        return num_custom_variables;
-    }
-
-private:
-    friend class ASTDecoder;
-
-    struct SamplerInfo {
-        std::optional<Tegra::Shader::TextureType> type;
-        std::optional<bool> is_array;
-        std::optional<bool> is_shadow;
-        std::optional<bool> is_buffer;
-
-        constexpr bool IsComplete() const noexcept {
-            return type && is_array && is_shadow && is_buffer;
-        }
-    };
-
-    void Decode();
-    void PostDecode();
-
-    NodeBlock DecodeRange(u32 begin, u32 end);
-    void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
-    void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
-
-    /**
-     * Decodes a single instruction from Tegra to IR.
-     * @param bb Basic block where the nodes will be written to.
-     * @param pc Program counter. Offset to decode.
-     * @return Next address to decode.
-     */
-    u32 DecodeInstr(NodeBlock& bb, u32 pc);
-
-    u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
-    u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
-    u32 DecodeBfe(NodeBlock& bb, u32 pc);
-    u32 DecodeBfi(NodeBlock& bb, u32 pc);
-    u32 DecodeShift(NodeBlock& bb, u32 pc);
-    u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
-    u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
-    u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
-    u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
-    u32 DecodeFfma(NodeBlock& bb, u32 pc);
-    u32 DecodeHfma2(NodeBlock& bb, u32 pc);
-    u32 DecodeConversion(NodeBlock& bb, u32 pc);
-    u32 DecodeWarp(NodeBlock& bb, u32 pc);
-    u32 DecodeMemory(NodeBlock& bb, u32 pc);
-    u32 DecodeTexture(NodeBlock& bb, u32 pc);
-    u32 DecodeImage(NodeBlock& bb, u32 pc);
-    u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
-    u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
-    u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
-    u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
-    u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
-    u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
-    u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
-    u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
-    u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
-    u32 DecodeVideo(NodeBlock& bb, u32 pc);
-    u32 DecodeXmad(NodeBlock& bb, u32 pc);
-    u32 DecodeOther(NodeBlock& bb, u32 pc);
-
-    /// Generates a node for a passed register.
-    Node GetRegister(Tegra::Shader::Register reg);
-    /// Generates a node for a custom variable
-    Node GetCustomVariable(u32 id);
-    /// Generates a node representing a 19-bit immediate value
-    Node GetImmediate19(Tegra::Shader::Instruction instr);
-    /// Generates a node representing a 32-bit immediate value
-    Node GetImmediate32(Tegra::Shader::Instruction instr);
-    /// Generates a node representing a constant buffer
-    Node GetConstBuffer(u64 index, u64 offset);
-    /// Generates a node representing a constant buffer with a variadic offset
-    Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
-    /// Generates a node for a passed predicate. It can be optionally negated
-    Node GetPredicate(u64 pred, bool negated = false);
-    /// Generates a predicate node for an immediate true or false value
-    Node GetPredicate(bool immediate);
-    /// Generates a node representing an input attribute. Keeps track of used attributes.
-    Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
-    /// Generates a node representing a physical input attribute.
-    Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
-    /// Generates a node representing an output attribute. Keeps track of used attributes.
-    Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
-    /// Generates a node representing an internal flag
-    Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
-    /// Generates a node representing a local memory address
-    Node GetLocalMemory(Node address);
-    /// Generates a node representing a shared memory address
-    Node GetSharedMemory(Node address);
-    /// Generates a temporary, internally it uses a post-RZ register
-    Node GetTemporary(u32 id);
-
-    /// Sets a register. src value must be a number-evaluated node.
-    void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
-    /// Sets a predicate. src value must be a bool-evaluated node
-    void SetPredicate(NodeBlock& bb, u64 dest, Node src);
-    /// Sets an internal flag. src value must be a bool-evaluated node
-    void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
-    /// Sets a local memory address with a value.
-    void SetLocalMemory(NodeBlock& bb, Node address, Node value);
-    /// Sets a shared memory address with a value.
-    void SetSharedMemory(NodeBlock& bb, Node address, Node value);
-    /// Sets a temporary. Internally it uses a post-RZ register
-    void SetTemporary(NodeBlock& bb, u32 id, Node value);
-
-    /// Sets internal flags from a float
-    void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
-    /// Sets internal flags from an integer
-    void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
-
-    /// Conditionally absolute/negated float. Absolute is applied first
-    Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
-    /// Conditionally saturates a float
-    Node GetSaturatedFloat(Node value, bool saturate = true);
-
-    /// Converts an integer to different sizes.
-    Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
-    /// Conditionally absolute/negated integer. Absolute is applied first
-    Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
-
-    /// Unpacks a half immediate from an instruction
-    Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
-    /// Unpacks a binary value into a half float pair with a type format
-    Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
-    /// Merges a half pair into another value
-    Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
-    /// Conditionally absolute/negated half float pair. Absolute is applied first
-    Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
-    /// Conditionally saturates a half float pair
-    Node GetSaturatedHalfFloat(Node value, bool saturate = true);
-
-    /// Get image component value by type and size
-    std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
-                                            u32 component_size, Node original_value);
-
-    /// Returns a predicate comparing two floats
-    Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
-    /// Returns a predicate comparing two integers
-    Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
-                                       Node op_a, Node op_b);
-    /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
-    Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
-
-    /// Returns a predicate combiner operation
-    OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
-
-    /// Queries the missing sampler info from the execution context.
-    SamplerInfo GetSamplerInfo(SamplerInfo info,
-                               std::optional<Tegra::Engines::SamplerDescriptor> sampler);
-
-    /// Accesses a texture sampler.
-    std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
-
-    /// Accesses a texture sampler for a bindless texture.
-    std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
-                                                   Node& index_var);
-
-    /// Accesses an image.
-    ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
-
-    /// Access a bindless image sampler.
-    ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
-
-    /// Extracts a sequence of bits from a node
-    Node BitfieldExtract(Node value, u32 offset, u32 bits);
-
-    /// Inserts a sequence of bits from a node
-    Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
-
-    /// Marks the usage of a input or output attribute.
-    void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
-
-    /// Decodes VMNMX instruction and inserts its code into the passed basic block.
-    void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
-
-    void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
-                                  const Node4& components);
-
-    void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
-                                   const Node4& components, bool ignore_mask = false);
-    void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
-                                       const Node4& components, bool ignore_mask = false);
-
-    Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                     Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-                     bool is_array, bool is_aoffi,
-                     std::optional<Tegra::Shader::Register> bindless_reg);
-
-    Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-                      bool is_array);
-
-    Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
-                      bool is_bindless);
-
-    Node4 GetTldCode(Tegra::Shader::Instruction instr);
-
-    Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      bool is_array);
-
-    std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
-        Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
-        bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
-
-    std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
-
-    std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
-
-    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
-                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
-                         std::optional<Tegra::Shader::Register> bindless_reg);
-
-    Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
-                         u64 byte_height);
-
-    void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
-                             Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
-                             Tegra::Shader::PredicateResultMode predicate_mode,
-                             Tegra::Shader::Pred predicate, bool sets_cc);
-    void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
-                              Node op_c, Node imm_lut, bool sets_cc);
-
-    std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
-
-    std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
-                                                       s64 cursor);
-
-    std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
-                                                             const OperationNode& operation,
-                                                             Node gpr, Node base_offset,
-                                                             Node tracked, const NodeBlock& code,
-                                                             s64 cursor);
-
-    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
-
-    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
-                                       s64 cursor) const;
-
-    std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
-                                                               Tegra::Shader::Instruction instr,
-                                                               bool is_read, bool is_write);
-
-    /// Register new amending code and obtain the reference id.
-    std::size_t DeclareAmend(Node new_amend);
-
-    u32 NewCustomVariable();
-
-    const ProgramCode& program_code;
-    const u32 main_offset;
-    const CompilerSettings settings;
-    Registry& registry;
-
-    bool decompiled{};
-    bool disable_flow_stack{};
-
-    u32 coverage_begin{};
-    u32 coverage_end{};
-
-    std::map<u32, NodeBlock> basic_blocks;
-    NodeBlock global_code;
-    ASTManager program_manager{true, true};
-    std::vector<Node> amend_code;
-    u32 num_custom_variables{};
-
-    std::set<u32> used_registers;
-    std::set<Tegra::Shader::Pred> used_predicates;
-    std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
-    std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
-    std::map<u32, ConstBuffer> used_cbufs;
-    std::list<SamplerEntry> used_samplers;
-    std::list<ImageEntry> used_images;
-    std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
-    std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
-    bool uses_layer{};
-    bool uses_viewport_index{};
-    bool uses_point_size{};
-    bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
-    bool uses_instance_id{};
-    bool uses_vertex_id{};
-    bool uses_legacy_varyings{};
-    bool uses_y_negate{};
-    bool uses_warps{};
-    bool uses_indexed_samplers{};
-
-    Tegra::Shader::Header header;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
deleted file mode 100644
index 6be3ea92b..000000000
--- a/src/video_core/shader/track.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <utility>
-#include <variant>
-
-#include "common/common_types.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
-                                   OperationCode operation_code) {
-    for (; cursor >= 0; --cursor) {
-        Node node = code.at(cursor);
-
-        if (const auto operation = std::get_if<OperationNode>(&*node)) {
-            if (operation->GetCode() == operation_code) {
-                return {std::move(node), cursor};
-            }
-        }
-
-        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
-            const auto& conditional_code = conditional->GetCode();
-            auto result = FindOperation(
-                conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
-            auto& found = result.first;
-            if (found) {
-                return {std::move(found), cursor};
-            }
-        }
-    }
-    return {};
-}
-
-std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
-    if (operation.GetCode() != OperationCode::UAdd) {
-        return std::nullopt;
-    }
-    Node gpr;
-    Node offset;
-    ASSERT(operation.GetOperandsCount() == 2);
-    for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
-        Node operand = operation[i];
-        if (std::holds_alternative<ImmediateNode>(*operand)) {
-            offset = operation[i];
-        } else if (std::holds_alternative<GprNode>(*operand)) {
-            gpr = operation[i];
-        }
-    }
-    if (offset && gpr) {
-        return std::make_pair(gpr, offset);
-    }
-    return std::nullopt;
-}
-
-bool AmendNodeCv(std::size_t amend_index, Node node) {
-    if (const auto operation = std::get_if<OperationNode>(&*node)) {
-        operation->SetAmendIndex(amend_index);
-        return true;
-    }
-    if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
-        conditional->SetAmendIndex(amend_index);
-        return true;
-    }
-    return false;
-}
-
-} // Anonymous namespace
-
-std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
-                                                             s64 cursor) {
-    if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
-        const u32 cbuf_index = cbuf->GetIndex();
-
-        // Constant buffer found, test if it's an immediate
-        const auto& offset = cbuf->GetOffset();
-        if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
-            auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
-            return {tracked, track};
-        }
-        if (const auto operation = std::get_if<OperationNode>(&*offset)) {
-            const u32 bound_buffer = registry.GetBoundBuffer();
-            if (bound_buffer != cbuf_index) {
-                return {};
-            }
-            if (const std::optional pair = DecoupleIndirectRead(*operation)) {
-                auto [gpr, base_offset] = *pair;
-                return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
-                                                  code, cursor);
-            }
-        }
-        return {};
-    }
-    if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
-        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
-            return {};
-        }
-        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
-        // register that it uses as operand
-        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
-        if (!source) {
-            return {};
-        }
-        return TrackBindlessSampler(source, code, new_cursor);
-    }
-    if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
-        const OperationNode& op = *operation;
-
-        const OperationCode opcode = operation->GetCode();
-        if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
-            ASSERT(op.GetOperandsCount() == 2);
-            auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
-            auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
-            if (node_a && node_b) {
-                auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
-                                                                   std::pair{offset_a, offset_b});
-                return {tracked, std::move(track)};
-            }
-        }
-        std::size_t i = op.GetOperandsCount();
-        while (i--) {
-            if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
-                // Constant buffer found in operand.
-                return found;
-            }
-        }
-        return {};
-    }
-    if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
-        const auto& conditional_code = conditional->GetCode();
-        return TrackBindlessSampler(tracked, conditional_code,
-                                    static_cast<s64>(conditional_code.size()));
-    }
-    return {};
-}
-
-std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
-    const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
-    const NodeBlock& code, s64 cursor) {
-    const auto offset_imm = std::get<ImmediateNode>(*base_offset);
-    const auto& gpu_driver = registry.AccessGuestDriverProfile();
-    const u32 bindless_cv = NewCustomVariable();
-    const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
-    Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
-
-    Node cv_node = GetCustomVariable(bindless_cv);
-    Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
-    const std::size_t amend_index = DeclareAmend(std::move(amend_op));
-    AmendNodeCv(amend_index, code[cursor]);
-
-    // TODO: Implement bindless index custom variable
-    auto track =
-        MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
-    return {tracked, track};
-}
-
-std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
-                                               s64 cursor) const {
-    if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
-        // Constant buffer found, test if it's an immediate
-        const auto& offset = cbuf->GetOffset();
-        if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
-            return {tracked, cbuf->GetIndex(), immediate->GetValue()};
-        }
-        return {};
-    }
-    if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
-        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
-            return {};
-        }
-        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
-        // register that it uses as operand
-        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
-        if (!source) {
-            return {};
-        }
-        return TrackCbuf(source, code, new_cursor);
-    }
-    if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
-        for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
-            if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
-                // Cbuf found in operand.
-                return found;
-            }
-        }
-        return {};
-    }
-    if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
-        const auto& conditional_code = conditional->GetCode();
-        return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
-    }
-    return {};
-}
-
-std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
-    // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
-    // that it uses as operand
-    const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
-    const auto& found = result.first;
-    if (!found) {
-        return std::nullopt;
-    }
-    if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
-        return immediate->GetValue();
-    }
-    return std::nullopt;
-}
-
-std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
-                                             s64 cursor) const {
-    for (; cursor >= 0; --cursor) {
-        const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
-        if (!found_node) {
-            return {};
-        }
-        const auto operation = std::get_if<OperationNode>(&*found_node);
-        ASSERT(operation);
-
-        const auto& target = (*operation)[0];
-        if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
-            if (gpr_target->GetIndex() == tracked->GetIndex()) {
-                return {(*operation)[1], new_cursor};
-            }
-        }
-    }
-    return {};
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
deleted file mode 100644
index 22a933761..000000000
--- a/src/video_core/shader/transform_feedback.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <unordered_map>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/transform_feedback.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
-
-/// Attribute offsets that describe a vector
-constexpr std::array VECTORS = {
-    28,  // gl_Position
-    32,  // Generic 0
-    36,  // Generic 1
-    40,  // Generic 2
-    44,  // Generic 3
-    48,  // Generic 4
-    52,  // Generic 5
-    56,  // Generic 6
-    60,  // Generic 7
-    64,  // Generic 8
-    68,  // Generic 9
-    72,  // Generic 10
-    76,  // Generic 11
-    80,  // Generic 12
-    84,  // Generic 13
-    88,  // Generic 14
-    92,  // Generic 15
-    96,  // Generic 16
-    100, // Generic 17
-    104, // Generic 18
-    108, // Generic 19
-    112, // Generic 20
-    116, // Generic 21
-    120, // Generic 22
-    124, // Generic 23
-    128, // Generic 24
-    132, // Generic 25
-    136, // Generic 26
-    140, // Generic 27
-    144, // Generic 28
-    148, // Generic 29
-    152, // Generic 30
-    156, // Generic 31
-    160, // gl_FrontColor
-    164, // gl_FrontSecondaryColor
-    160, // gl_BackColor
-    164, // gl_BackSecondaryColor
-    192, // gl_TexCoord[0]
-    196, // gl_TexCoord[1]
-    200, // gl_TexCoord[2]
-    204, // gl_TexCoord[3]
-    208, // gl_TexCoord[4]
-    212, // gl_TexCoord[5]
-    216, // gl_TexCoord[6]
-    220, // gl_TexCoord[7]
-};
-} // namespace
-
-std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
-
-    std::unordered_map<u8, VaryingTFB> tfb;
-
-    for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
-        const auto& locations = info.tfb_varying_locs[buffer];
-        const auto& layout = info.tfb_layouts[buffer];
-        const std::size_t varying_count = layout.varying_count;
-
-        std::size_t highest = 0;
-
-        for (std::size_t offset = 0; offset < varying_count; ++offset) {
-            const std::size_t base_offset = offset;
-            const u8 location = locations[offset];
-
-            VaryingTFB varying;
-            varying.buffer = layout.stream;
-            varying.stride = layout.stride;
-            varying.offset = offset * sizeof(u32);
-            varying.components = 1;
-
-            if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
-                UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
-
-                const u8 base_index = location / 4;
-                while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
-                    ++offset;
-                    ++varying.components;
-                }
-            }
-
-            [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
-            UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
-
-            highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
-        }
-
-        UNIMPLEMENTED_IF(highest != layout.stride);
-    }
-    return tfb;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
deleted file mode 100644
index 77d05f64c..000000000
--- a/src/video_core/shader/transform_feedback.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <unordered_map>
-
-#include "common/common_types.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-struct VaryingTFB {
-    std::size_t buffer;
-    std::size_t stride;
-    std::size_t offset;
-    std::size_t components;
-};
-
-std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
new file mode 100644
index 000000000..78bf90c48
--- /dev/null
+++ b/src/video_core/shader_cache.cpp
@@ -0,0 +1,250 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include "common/assert.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
+
+namespace VideoCommon {
+
+void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
+    std::scoped_lock lock{invalidation_mutex};
+    InvalidatePagesInRegion(addr, size);
+    RemovePendingShaders();
+}
+
+void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
+    std::lock_guard lock{invalidation_mutex};
+    InvalidatePagesInRegion(addr, size);
+}
+
+void ShaderCache::SyncGuestHost() {
+    std::scoped_lock lock{invalidation_mutex};
+    RemovePendingShaders();
+}
+
+ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
+                         Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+                         Tegra::Engines::KeplerCompute& kepler_compute_)
+    : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
+      rasterizer{rasterizer_} {}
+
+bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
+    auto& dirty{maxwell3d.dirty.flags};
+    if (!dirty[VideoCommon::Dirty::Shaders]) {
+        return last_shaders_valid;
+    }
+    dirty[VideoCommon::Dirty::Shaders] = false;
+
+    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+    for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
+        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
+            unique_hashes[index] = 0;
+            continue;
+        }
+        const auto& shader_config{maxwell3d.regs.shader_config[index]};
+        const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
+        const GPUVAddr shader_addr{base_addr + shader_config.offset};
+        const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+        if (!cpu_shader_addr) {
+            LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
+            last_shaders_valid = false;
+            return false;
+        }
+        const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
+        if (!shader_info) {
+            const u32 start_address{shader_config.offset};
+            GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
+            shader_info = MakeShaderInfo(env, *cpu_shader_addr);
+        }
+        shader_infos[index] = shader_info;
+        unique_hashes[index] = shader_info->unique_hash;
+    }
+    last_shaders_valid = true;
+    return true;
+}
+
+const ShaderInfo* ShaderCache::ComputeShader() {
+    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+    const auto& qmd{kepler_compute.launch_description};
+    const GPUVAddr shader_addr{program_base + qmd.program_start};
+    const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+    if (!cpu_shader_addr) {
+        LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
+        return nullptr;
+    }
+    if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
+        return shader;
+    }
+    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    return MakeShaderInfo(env, *cpu_shader_addr);
+}
+
+void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
+                                          const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
+    size_t env_index{};
+    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+    for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
+        if (unique_hashes[index] == 0) {
+            continue;
+        }
+        const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
+        auto& env{result.envs[index]};
+        const u32 start_address{maxwell3d.regs.shader_config[index].offset};
+        env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+        env.SetCachedSize(shader_infos[index]->size_bytes);
+        result.env_ptrs[env_index++] = &env;
+    }
+}
+
+ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
+    std::scoped_lock lock{lookup_mutex};
+
+    const auto it = lookup_cache.find(addr);
+    if (it == lookup_cache.end()) {
+        return nullptr;
+    }
+    return it->second->data;
+}
+
+void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
+    std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+    const VAddr addr_end = addr + size;
+    Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+    const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+    for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+        invalidation_cache[page].push_back(entry);
+    }
+
+    storage.push_back(std::move(data));
+
+    rasterizer.UpdatePagesCachedCount(addr, size, 1);
+}
+
+void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
+    const VAddr addr_end = addr + size;
+    const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+    for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+        auto it = invalidation_cache.find(page);
+        if (it == invalidation_cache.end()) {
+            continue;
+        }
+        InvalidatePageEntries(it->second, addr, addr_end);
+    }
+}
+
+void ShaderCache::RemovePendingShaders() {
+    if (marked_for_removal.empty()) {
+        return;
+    }
+    // Remove duplicates
+    std::ranges::sort(marked_for_removal);
+    marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
+                             marked_for_removal.end());
+
+    std::vector<ShaderInfo*> removed_shaders;
+    removed_shaders.reserve(marked_for_removal.size());
+
+    std::scoped_lock lock{lookup_mutex};
+
+    for (Entry* const entry : marked_for_removal) {
+        removed_shaders.push_back(entry->data);
+
+        const auto it = lookup_cache.find(entry->addr_start);
+        ASSERT(it != lookup_cache.end());
+        lookup_cache.erase(it);
+    }
+    marked_for_removal.clear();
+
+    if (!removed_shaders.empty()) {
+        RemoveShadersFromStorage(std::move(removed_shaders));
+    }
+}
+
+void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+    size_t index = 0;
+    while (index < entries.size()) {
+        Entry* const entry = entries[index];
+        if (!entry->Overlaps(addr, addr_end)) {
+            ++index;
+            continue;
+        }
+
+        UnmarkMemory(entry);
+        RemoveEntryFromInvalidationCache(entry);
+        marked_for_removal.push_back(entry);
+    }
+}
+
+void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
+    const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+    for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
+        const auto entries_it = invalidation_cache.find(page);
+        ASSERT(entries_it != invalidation_cache.end());
+        std::vector<Entry*>& entries = entries_it->second;
+
+        const auto entry_it = std::ranges::find(entries, entry);
+        ASSERT(entry_it != entries.end());
+        entries.erase(entry_it);
+    }
+}
+
+void ShaderCache::UnmarkMemory(Entry* entry) {
+    if (!entry->is_memory_marked) {
+        return;
+    }
+    entry->is_memory_marked = false;
+
+    const VAddr addr = entry->addr_start;
+    const size_t size = entry->addr_end - addr;
+    rasterizer.UpdatePagesCachedCount(addr, size, -1);
+}
+
+void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
+    // Remove them from the cache
+    std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
+        return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
+    });
+}
+
+ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
+    auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+    Entry* const entry_pointer = entry.get();
+
+    lookup_cache.emplace(addr, std::move(entry));
+    return entry_pointer;
+}
+
+const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
+    auto info = std::make_unique<ShaderInfo>();
+    if (const std::optional<u64> cached_hash{env.Analyze()}) {
+        info->unique_hash = *cached_hash;
+        info->size_bytes = env.CachedSize();
+    } else {
+        // Slow path, not really hit on commercial games
+        // Build a control flow graph to get the real shader size
+        Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+        Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
+        info->unique_hash = env.CalculateHash();
+        info->size_bytes = env.ReadSize();
+    }
+    const size_t size_bytes{info->size_bytes};
+    const ShaderInfo* const result{info.get()};
+    Register(std::move(info), cpu_addr, size_bytes);
+    return result;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 015a789d6..136fe294c 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -5,226 +5,147 @@
 #pragma once
 
 #include <algorithm>
+#include <array>
 #include <memory>
 #include <mutex>
+#include <span>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 
-#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/shader_environment.h"
+
+namespace Tegra {
+class MemoryManager;
+}
 
 namespace VideoCommon {
 
-template <class T>
+class GenericEnvironment;
+
+struct ShaderInfo {
+    u64 unique_hash{};
+    size_t size_bytes{};
+};
+
 class ShaderCache {
     static constexpr u64 PAGE_BITS = 14;
     static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
 
+    static constexpr size_t NUM_PROGRAMS = 6;
+
     struct Entry {
         VAddr addr_start;
         VAddr addr_end;
-        T* data;
+        ShaderInfo* data;
 
         bool is_memory_marked = true;
 
-        constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
+        bool Overlaps(VAddr start, VAddr end) const noexcept {
             return start < addr_end && addr_start < end;
         }
     };
 
 public:
-    virtual ~ShaderCache() = default;
-
     /// @brief Removes shaders inside a given region
     /// @note Checks for ranges
     /// @param addr Start address of the invalidation
     /// @param size Number of bytes of the invalidation
-    void InvalidateRegion(VAddr addr, std::size_t size) {
-        std::scoped_lock lock{invalidation_mutex};
-        InvalidatePagesInRegion(addr, size);
-        RemovePendingShaders();
-    }
+    void InvalidateRegion(VAddr addr, size_t size);
 
     /// @brief Unmarks a memory region as cached and marks it for removal
     /// @param addr Start address of the CPU write operation
     /// @param size Number of bytes of the CPU write operation
-    void OnCPUWrite(VAddr addr, std::size_t size) {
-        std::lock_guard lock{invalidation_mutex};
-        InvalidatePagesInRegion(addr, size);
-    }
+    void OnCPUWrite(VAddr addr, size_t size);
 
     /// @brief Flushes delayed removal operations
-    void SyncGuestHost() {
-        std::scoped_lock lock{invalidation_mutex};
-        RemovePendingShaders();
-    }
+    void SyncGuestHost();
 
-    /// @brief Tries to obtain a cached shader starting in a given address
-    /// @note Doesn't check for ranges, the given address has to be the start of the shader
-    /// @param addr Start address of the shader, this doesn't cache for region
-    /// @return Pointer to a valid shader, nullptr when nothing is found
-    T* TryGet(VAddr addr) const {
-        std::scoped_lock lock{lookup_mutex};
+protected:
+    struct GraphicsEnvironments {
+        std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
+        std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
 
-        const auto it = lookup_cache.find(addr);
-        if (it == lookup_cache.end()) {
-            return nullptr;
+        std::span<Shader::Environment* const> Span() const noexcept {
+            return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
         }
-        return it->second->data;
-    }
-
-protected:
-    explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
+    };
 
-    /// @brief Register in the cache a given entry
-    /// @param data Shader to store in the cache
-    /// @param addr Start address of the shader that will be registered
-    /// @param size Size in bytes of the shader
-    void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
-        std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+    explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
+                         Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+                         Tegra::Engines::KeplerCompute& kepler_compute_);
 
-        const VAddr addr_end = addr + size;
-        Entry* const entry = NewEntry(addr, addr_end, data.get());
+    /// @brief Update the hashes and information of shader stages
+    /// @param unique_hashes Shader hashes to store into when a stage is enabled
+    /// @return True no success, false on error
+    bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
 
-        const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
-        for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
-            invalidation_cache[page].push_back(entry);
-        }
+    /// @brief Returns information about the current compute shader
+    /// @return Pointer to a valid shader, nullptr on error
+    const ShaderInfo* ComputeShader();
 
-        storage.push_back(std::move(data));
+    /// @brief Collect the current graphics environments
+    void GetGraphicsEnvironments(GraphicsEnvironments& result,
+                                 const std::array<u64, NUM_PROGRAMS>& unique_hashes);
 
-        rasterizer.UpdatePagesCachedCount(addr, size, 1);
-    }
+    Tegra::MemoryManager& gpu_memory;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
 
-    /// @brief Called when a shader is going to be removed
-    /// @param shader Shader that will be removed
-    /// @pre invalidation_cache is locked
-    /// @pre lookup_mutex is locked
-    virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
+    std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
+    bool last_shaders_valid = false;
 
 private:
+    /// @brief Tries to obtain a cached shader starting in a given address
+    /// @note Doesn't check for ranges, the given address has to be the start of the shader
+    /// @param addr Start address of the shader, this doesn't cache for region
+    /// @return Pointer to a valid shader, nullptr when nothing is found
+    ShaderInfo* TryGet(VAddr addr) const;
+
+    /// @brief Register in the cache a given entry
+    /// @param data Shader to store in the cache
+    /// @param addr Start address of the shader that will be registered
+    /// @param size Size in bytes of the shader
+    void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size);
+
     /// @brief Invalidate pages in a given region
     /// @pre invalidation_mutex is locked
-    void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
-        const VAddr addr_end = addr + size;
-        const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
-        for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
-            auto it = invalidation_cache.find(page);
-            if (it == invalidation_cache.end()) {
-                continue;
-            }
-            InvalidatePageEntries(it->second, addr, addr_end);
-        }
-    }
+    void InvalidatePagesInRegion(VAddr addr, size_t size);
 
     /// @brief Remove shaders marked for deletion
     /// @pre invalidation_mutex is locked
-    void RemovePendingShaders() {
-        if (marked_for_removal.empty()) {
-            return;
-        }
-        // Remove duplicates
-        std::sort(marked_for_removal.begin(), marked_for_removal.end());
-        marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
-                                 marked_for_removal.end());
-
-        std::vector<T*> removed_shaders;
-        removed_shaders.reserve(marked_for_removal.size());
-
-        std::scoped_lock lock{lookup_mutex};
-
-        for (Entry* const entry : marked_for_removal) {
-            removed_shaders.push_back(entry->data);
-
-            const auto it = lookup_cache.find(entry->addr_start);
-            ASSERT(it != lookup_cache.end());
-            lookup_cache.erase(it);
-        }
-        marked_for_removal.clear();
-
-        if (!removed_shaders.empty()) {
-            RemoveShadersFromStorage(std::move(removed_shaders));
-        }
-    }
+    void RemovePendingShaders();
 
     /// @brief Invalidates entries in a given range for the passed page
     /// @param entries         Vector of entries in the page, it will be modified on overlaps
     /// @param addr            Start address of the invalidation
     /// @param addr_end        Non-inclusive end address of the invalidation
     /// @pre invalidation_mutex is locked
-    void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
-        std::size_t index = 0;
-        while (index < entries.size()) {
-            Entry* const entry = entries[index];
-            if (!entry->Overlaps(addr, addr_end)) {
-                ++index;
-                continue;
-            }
-
-            UnmarkMemory(entry);
-            RemoveEntryFromInvalidationCache(entry);
-            marked_for_removal.push_back(entry);
-        }
-    }
+    void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end);
 
     /// @brief Removes all references to an entry in the invalidation cache
     /// @param entry Entry to remove from the invalidation cache
     /// @pre invalidation_mutex is locked
-    void RemoveEntryFromInvalidationCache(const Entry* entry) {
-        const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
-        for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
-            const auto entries_it = invalidation_cache.find(page);
-            ASSERT(entries_it != invalidation_cache.end());
-            std::vector<Entry*>& entries = entries_it->second;
-
-            const auto entry_it = std::find(entries.begin(), entries.end(), entry);
-            ASSERT(entry_it != entries.end());
-            entries.erase(entry_it);
-        }
-    }
+    void RemoveEntryFromInvalidationCache(const Entry* entry);
 
     /// @brief Unmarks an entry from the rasterizer cache
     /// @param entry Entry to unmark from memory
-    void UnmarkMemory(Entry* entry) {
-        if (!entry->is_memory_marked) {
-            return;
-        }
-        entry->is_memory_marked = false;
-
-        const VAddr addr = entry->addr_start;
-        const std::size_t size = entry->addr_end - addr;
-        rasterizer.UpdatePagesCachedCount(addr, size, -1);
-    }
+    void UnmarkMemory(Entry* entry);
 
     /// @brief Removes a vector of shaders from a list
     /// @param removed_shaders Shaders to be removed from the storage
     /// @pre invalidation_mutex is locked
     /// @pre lookup_mutex is locked
-    void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
-        // Notify removals
-        for (T* const shader : removed_shaders) {
-            OnShaderRemoval(shader);
-        }
-
-        // Remove them from the cache
-        const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
-            return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
-                   removed_shaders.end();
-        };
-        std::erase_if(storage, is_removed);
-    }
+    void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
 
     /// @brief Creates a new entry in the lookup cache and returns its pointer
     /// @pre lookup_mutex is locked
-    Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
-        auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
-        Entry* const entry_pointer = entry.get();
+    Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data);
 
-        lookup_cache.emplace(addr, std::move(entry));
-        return entry_pointer;
-    }
+    /// @brief Create a new shader entry and register it
+    const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
 
     VideoCore::RasterizerInterface& rasterizer;
 
@@ -233,7 +154,7 @@ private:
 
     std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
     std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
-    std::vector<std::unique_ptr<T>> storage;
+    std::vector<std::unique_ptr<ShaderInfo>> storage;
     std::vector<Entry*> marked_for_removal;
 };
 
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
new file mode 100644
index 000000000..8a4581c19
--- /dev/null
+++ b/src/video_core/shader_environment.cpp
@@ -0,0 +1,460 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <optional>
+#include <utility>
+
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "common/fs/fs.h"
+#include "common/logging/log.h"
+#include "shader_recompiler/environment.h"
+#include "video_core/memory_manager.h"
+#include "video_core/shader_environment.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
+
+constexpr size_t INST_SIZE = sizeof(u64);
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+static u64 MakeCbufKey(u32 index, u32 offset) {
+    return (static_cast<u64>(index) << 32) | offset;
+}
+
+static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
+    switch (entry.texture_type) {
+    case Tegra::Texture::TextureType::Texture1D:
+        return Shader::TextureType::Color1D;
+    case Tegra::Texture::TextureType::Texture2D:
+    case Tegra::Texture::TextureType::Texture2DNoMipmap:
+        return Shader::TextureType::Color2D;
+    case Tegra::Texture::TextureType::Texture3D:
+        return Shader::TextureType::Color3D;
+    case Tegra::Texture::TextureType::TextureCubemap:
+        return Shader::TextureType::ColorCube;
+    case Tegra::Texture::TextureType::Texture1DArray:
+        return Shader::TextureType::ColorArray1D;
+    case Tegra::Texture::TextureType::Texture2DArray:
+        return Shader::TextureType::ColorArray2D;
+    case Tegra::Texture::TextureType::Texture1DBuffer:
+        return Shader::TextureType::Buffer;
+    case Tegra::Texture::TextureType::TextureCubeArray:
+        return Shader::TextureType::ColorArrayCube;
+    default:
+        throw Shader::NotImplementedException("Unknown texture type");
+    }
+}
+
+GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+                                       u32 start_address_)
+    : gpu_memory{&gpu_memory_}, program_base{program_base_} {
+    start_address = start_address_;
+}
+
+GenericEnvironment::~GenericEnvironment() = default;
+
+u32 GenericEnvironment::TextureBoundBuffer() const {
+    return texture_bound;
+}
+
+u32 GenericEnvironment::LocalMemorySize() const {
+    return local_memory_size;
+}
+
+u32 GenericEnvironment::SharedMemorySize() const {
+    return shared_memory_size;
+}
+
+std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
+    return workgroup_size;
+}
+
+u64 GenericEnvironment::ReadInstruction(u32 address) {
+    read_lowest = std::min(read_lowest, address);
+    read_highest = std::max(read_highest, address);
+
+    if (address >= cached_lowest && address < cached_highest) {
+        return code[(address - cached_lowest) / INST_SIZE];
+    }
+    has_unbound_instructions = true;
+    return gpu_memory->Read<u64>(program_base + address);
+}
+
+std::optional<u64> GenericEnvironment::Analyze() {
+    const std::optional<u64> size{TryFindSize()};
+    if (!size) {
+        return std::nullopt;
+    }
+    cached_lowest = start_address;
+    cached_highest = start_address + static_cast<u32>(*size);
+    return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
+}
+
+void GenericEnvironment::SetCachedSize(size_t size_bytes) {
+    cached_lowest = start_address;
+    cached_highest = start_address + static_cast<u32>(size_bytes);
+    code.resize(CachedSize());
+    gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
+}
+
+size_t GenericEnvironment::CachedSize() const noexcept {
+    return cached_highest - cached_lowest + INST_SIZE;
+}
+
+size_t GenericEnvironment::ReadSize() const noexcept {
+    return read_highest - read_lowest + INST_SIZE;
+}
+
+bool GenericEnvironment::CanBeSerialized() const noexcept {
+    return !has_unbound_instructions;
+}
+
+u64 GenericEnvironment::CalculateHash() const {
+    const size_t size{ReadSize()};
+    const auto data{std::make_unique<char[]>(size)};
+    gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
+    return Common::CityHash64(data.get(), size);
+}
+
+void GenericEnvironment::Serialize(std::ofstream& file) const {
+    const u64 code_size{static_cast<u64>(CachedSize())};
+    const u64 num_texture_types{static_cast<u64>(texture_types.size())};
+    const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
+
+    file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
+        .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
+        .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
+        .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
+        .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
+        .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
+        .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
+        .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
+        .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
+        .write(reinterpret_cast<const char*>(code.data()), code_size);
+    for (const auto [key, type] : texture_types) {
+        file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+            .write(reinterpret_cast<const char*>(&type), sizeof(type));
+    }
+    for (const auto [key, type] : cbuf_values) {
+        file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+            .write(reinterpret_cast<const char*>(&type), sizeof(type));
+    }
+    if (stage == Shader::Stage::Compute) {
+        file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
+            .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
+    } else {
+        file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
+        if (stage == Shader::Stage::Geometry) {
+            file.write(reinterpret_cast<const char*>(&gp_passthrough_mask),
+                       sizeof(gp_passthrough_mask));
+        }
+    }
+}
+
+std::optional<u64> GenericEnvironment::TryFindSize() {
+    static constexpr size_t BLOCK_SIZE = 0x1000;
+    static constexpr size_t MAXIMUM_SIZE = 0x100000;
+
+    static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
+    static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
+
+    GPUVAddr guest_addr{program_base + start_address};
+    size_t offset{0};
+    size_t size{BLOCK_SIZE};
+    while (size <= MAXIMUM_SIZE) {
+        code.resize(size / INST_SIZE);
+        u64* const data = code.data() + offset / INST_SIZE;
+        gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
+        for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
+            const u64 inst = data[index / INST_SIZE];
+            if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
+                return offset + index;
+            }
+        }
+        guest_addr += BLOCK_SIZE;
+        size += BLOCK_SIZE;
+        offset += BLOCK_SIZE;
+    }
+    return std::nullopt;
+}
+
+Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
+                                                            bool via_header_index, u32 raw) {
+    const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
+    const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
+    Tegra::Texture::TICEntry entry;
+    gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
+    const Shader::TextureType result{ConvertType(entry)};
+    texture_types.emplace(raw, result);
+    return result;
+}
+
+GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
+                                         Tegra::MemoryManager& gpu_memory_,
+                                         Maxwell::ShaderProgram program, GPUVAddr program_base_,
+                                         u32 start_address_)
+    : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
+    gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
+    gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
+    switch (program) {
+    case Maxwell::ShaderProgram::VertexA:
+        stage = Shader::Stage::VertexA;
+        stage_index = 0;
+        break;
+    case Maxwell::ShaderProgram::VertexB:
+        stage = Shader::Stage::VertexB;
+        stage_index = 0;
+        break;
+    case Maxwell::ShaderProgram::TesselationControl:
+        stage = Shader::Stage::TessellationControl;
+        stage_index = 1;
+        break;
+    case Maxwell::ShaderProgram::TesselationEval:
+        stage = Shader::Stage::TessellationEval;
+        stage_index = 2;
+        break;
+    case Maxwell::ShaderProgram::Geometry:
+        stage = Shader::Stage::Geometry;
+        stage_index = 3;
+        break;
+    case Maxwell::ShaderProgram::Fragment:
+        stage = Shader::Stage::Fragment;
+        stage_index = 4;
+        break;
+    default:
+        UNREACHABLE_MSG("Invalid program={}", program);
+        break;
+    }
+    const u64 local_size{sph.LocalMemorySize()};
+    ASSERT(local_size <= std::numeric_limits<u32>::max());
+    local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
+    texture_bound = maxwell3d->regs.tex_cb_index;
+}
+
+u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+    const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
+    ASSERT(cbuf.enabled);
+    u32 value{};
+    if (cbuf_offset < cbuf.size) {
+        value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
+    }
+    cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
+    return value;
+}
+
+Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
+    const auto& regs{maxwell3d->regs};
+    const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+    return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
+}
+
+ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
+                                       Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+                                       u32 start_address_)
+    : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
+                                                                          &kepler_compute_} {
+    const auto& qmd{kepler_compute->launch_description};
+    stage = Shader::Stage::Compute;
+    local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
+    texture_bound = kepler_compute->regs.tex_cb_index;
+    shared_memory_size = qmd.shared_alloc;
+    workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
+}
+
+u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+    const auto& qmd{kepler_compute->launch_description};
+    ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
+    const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
+    u32 value{};
+    if (cbuf_offset < cbuf.size) {
+        value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
+    }
+    cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
+    return value;
+}
+
+Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
+    const auto& regs{kepler_compute->regs};
+    const auto& qmd{kepler_compute->launch_description};
+    return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
+}
+
+void FileEnvironment::Deserialize(std::ifstream& file) {
+    u64 code_size{};
+    u64 num_texture_types{};
+    u64 num_cbuf_values{};
+    file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
+        .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
+        .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
+        .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
+        .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
+        .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
+        .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
+        .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
+        .read(reinterpret_cast<char*>(&stage), sizeof(stage));
+    code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
+    file.read(reinterpret_cast<char*>(code.get()), code_size);
+    for (size_t i = 0; i < num_texture_types; ++i) {
+        u32 key;
+        Shader::TextureType type;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key))
+            .read(reinterpret_cast<char*>(&type), sizeof(type));
+        texture_types.emplace(key, type);
+    }
+    for (size_t i = 0; i < num_cbuf_values; ++i) {
+        u64 key;
+        u32 value;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key))
+            .read(reinterpret_cast<char*>(&value), sizeof(value));
+        cbuf_values.emplace(key, value);
+    }
+    if (stage == Shader::Stage::Compute) {
+        file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
+            .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
+    } else {
+        file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
+        if (stage == Shader::Stage::Geometry) {
+            file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
+        }
+    }
+}
+
+u64 FileEnvironment::ReadInstruction(u32 address) {
+    if (address < read_lowest || address > read_highest) {
+        throw Shader::LogicError("Out of bounds address {}", address);
+    }
+    return code[(address - read_lowest) / sizeof(u64)];
+}
+
+u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+    const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
+    if (it == cbuf_values.end()) {
+        throw Shader::LogicError("Uncached read texture type");
+    }
+    return it->second;
+}
+
+Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
+    const auto it{texture_types.find(handle)};
+    if (it == texture_types.end()) {
+        throw Shader::LogicError("Uncached read texture type");
+    }
+    return it->second;
+}
+
+u32 FileEnvironment::LocalMemorySize() const {
+    return local_memory_size;
+}
+
+u32 FileEnvironment::SharedMemorySize() const {
+    return shared_memory_size;
+}
+
+u32 FileEnvironment::TextureBoundBuffer() const {
+    return texture_bound;
+}
+
+std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
+    return workgroup_size;
+}
+
+void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
+                       const std::filesystem::path& filename, u32 cache_version) try {
+    std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
+    file.exceptions(std::ifstream::failbit);
+    if (!file.is_open()) {
+        LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
+                  Common::FS::PathToUTF8String(filename));
+        return;
+    }
+    if (file.tellp() == 0) {
+        // Write header
+        file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
+            .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
+    }
+    if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
+        return;
+    }
+    const u32 num_envs{static_cast<u32>(envs.size())};
+    file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
+    for (const GenericEnvironment* const env : envs) {
+        env->Serialize(file);
+    }
+    file.write(key.data(), key.size_bytes());
+
+} catch (const std::ios_base::failure& e) {
+    LOG_ERROR(Common_Filesystem, "{}", e.what());
+    if (!Common::FS::RemoveFile(filename)) {
+        LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
+                  Common::FS::PathToUTF8String(filename));
+    }
+}
+
+void LoadPipelines(
+    std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
+    Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
+    Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
+    std::ifstream file(filename, std::ios::binary | std::ios::ate);
+    if (!file.is_open()) {
+        return;
+    }
+    file.exceptions(std::ifstream::failbit);
+    const auto end{file.tellg()};
+    file.seekg(0, std::ios::beg);
+
+    std::array<char, 8> magic_number;
+    u32 cache_version;
+    file.read(magic_number.data(), magic_number.size())
+        .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
+    if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) {
+        file.close();
+        if (Common::FS::RemoveFile(filename)) {
+            if (magic_number != MAGIC_NUMBER) {
+                LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
+            }
+            if (cache_version != expected_cache_version) {
+                LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
+            }
+        } else {
+            LOG_ERROR(Common_Filesystem,
+                      "Invalid pipeline cache file and failed to delete it in \"{}\"",
+                      Common::FS::PathToUTF8String(filename));
+        }
+        return;
+    }
+    while (file.tellg() != end) {
+        if (stop_loading.stop_requested()) {
+            return;
+        }
+        u32 num_envs{};
+        file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
+        std::vector<FileEnvironment> envs(num_envs);
+        for (FileEnvironment& env : envs) {
+            env.Deserialize(file);
+        }
+        if (envs.front().ShaderStage() == Shader::Stage::Compute) {
+            load_compute(file, std::move(envs.front()));
+        } else {
+            load_graphics(file, std::move(envs));
+        }
+    }
+
+} catch (const std::ios_base::failure& e) {
+    LOG_ERROR(Common_Filesystem, "{}", e.what());
+    if (!Common::FS::RemoveFile(filename)) {
+        LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
+                  Common::FS::PathToUTF8String(filename));
+    }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
new file mode 100644
index 000000000..2079979db
--- /dev/null
+++ b/src/video_core/shader_environment.h
@@ -0,0 +1,183 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <filesystem>
+#include <iosfwd>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <span>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/unique_function.h"
+#include "shader_recompiler/environment.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+class Memorymanager;
+}
+
+namespace VideoCommon {
+
+class GenericEnvironment : public Shader::Environment {
+public:
+    explicit GenericEnvironment() = default;
+    explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+                                u32 start_address_);
+
+    ~GenericEnvironment() override;
+
+    [[nodiscard]] u32 TextureBoundBuffer() const final;
+
+    [[nodiscard]] u32 LocalMemorySize() const final;
+
+    [[nodiscard]] u32 SharedMemorySize() const final;
+
+    [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final;
+
+    [[nodiscard]] u64 ReadInstruction(u32 address) final;
+
+    [[nodiscard]] std::optional<u64> Analyze();
+
+    void SetCachedSize(size_t size_bytes);
+
+    [[nodiscard]] size_t CachedSize() const noexcept;
+
+    [[nodiscard]] size_t ReadSize() const noexcept;
+
+    [[nodiscard]] bool CanBeSerialized() const noexcept;
+
+    [[nodiscard]] u64 CalculateHash() const;
+
+    void Serialize(std::ofstream& file) const;
+
+protected:
+    std::optional<u64> TryFindSize();
+
+    Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
+                                            u32 raw);
+
+    Tegra::MemoryManager* gpu_memory{};
+    GPUVAddr program_base{};
+
+    std::vector<u64> code;
+    std::unordered_map<u32, Shader::TextureType> texture_types;
+    std::unordered_map<u64, u32> cbuf_values;
+
+    u32 local_memory_size{};
+    u32 texture_bound{};
+    u32 shared_memory_size{};
+    std::array<u32, 3> workgroup_size{};
+
+    u32 read_lowest = std::numeric_limits<u32>::max();
+    u32 read_highest = 0;
+
+    u32 cached_lowest = std::numeric_limits<u32>::max();
+    u32 cached_highest = 0;
+
+    bool has_unbound_instructions = false;
+};
+
+class GraphicsEnvironment final : public GenericEnvironment {
+public:
+    explicit GraphicsEnvironment() = default;
+    explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
+                                 Tegra::MemoryManager& gpu_memory_,
+                                 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program,
+                                 GPUVAddr program_base_, u32 start_address_);
+
+    ~GraphicsEnvironment() override = default;
+
+    u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+    Shader::TextureType ReadTextureType(u32 handle) override;
+
+private:
+    Tegra::Engines::Maxwell3D* maxwell3d{};
+    size_t stage_index{};
+};
+
+class ComputeEnvironment final : public GenericEnvironment {
+public:
+    explicit ComputeEnvironment() = default;
+    explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
+                                Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+                                u32 start_address_);
+
+    ~ComputeEnvironment() override = default;
+
+    u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+    Shader::TextureType ReadTextureType(u32 handle) override;
+
+private:
+    Tegra::Engines::KeplerCompute* kepler_compute{};
+};
+
+class FileEnvironment final : public Shader::Environment {
+public:
+    FileEnvironment() = default;
+    ~FileEnvironment() override = default;
+
+    FileEnvironment& operator=(FileEnvironment&&) noexcept = default;
+    FileEnvironment(FileEnvironment&&) noexcept = default;
+
+    FileEnvironment& operator=(const FileEnvironment&) = delete;
+    FileEnvironment(const FileEnvironment&) = delete;
+
+    void Deserialize(std::ifstream& file);
+
+    [[nodiscard]] u64 ReadInstruction(u32 address) override;
+
+    [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+    [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
+
+    [[nodiscard]] u32 LocalMemorySize() const override;
+
+    [[nodiscard]] u32 SharedMemorySize() const override;
+
+    [[nodiscard]] u32 TextureBoundBuffer() const override;
+
+    [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
+
+private:
+    std::unique_ptr<u64[]> code;
+    std::unordered_map<u32, Shader::TextureType> texture_types;
+    std::unordered_map<u64, u32> cbuf_values;
+    std::array<u32, 3> workgroup_size{};
+    u32 local_memory_size{};
+    u32 shared_memory_size{};
+    u32 texture_bound{};
+    u32 read_lowest{};
+    u32 read_highest{};
+};
+
+void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
+                       const std::filesystem::path& filename, u32 cache_version);
+
+template <typename Key, typename Envs>
+void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename,
+                       u32 cache_version) {
+    static_assert(std::is_trivially_copyable_v<Key>);
+    static_assert(std::has_unique_object_representations_v<Key>);
+    SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)),
+                      std::span(envs.data(), envs.size()), filename, cache_version);
+}
+
+void LoadPipelines(
+    std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
+    Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
+    Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics);
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index 693e47158..dc6995b46 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -2,42 +2,35 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <mutex>
+#include <atomic>
+#include <chrono>
+#include <optional>
+
 #include "video_core/shader_notify.h"
 
 using namespace std::chrono_literals;
 
 namespace VideoCore {
-namespace {
-constexpr auto UPDATE_TICK = 32ms;
-}
-
-ShaderNotify::ShaderNotify() = default;
-ShaderNotify::~ShaderNotify() = default;
 
-std::size_t ShaderNotify::GetShadersBuilding() {
-    const auto now = std::chrono::high_resolution_clock::now();
-    const auto diff = now - last_update;
-    if (diff > UPDATE_TICK) {
-        std::shared_lock lock(mutex);
-        last_updated_count = accurate_count;
+const auto TIME_TO_STOP_REPORTING = 2s;
+
+int ShaderNotify::ShadersBuilding() noexcept {
+    const int now_complete = num_complete.load(std::memory_order::relaxed);
+    const int now_building = num_building.load(std::memory_order::relaxed);
+    if (now_complete == now_building) {
+        const auto now = std::chrono::high_resolution_clock::now();
+        if (completed && num_complete == num_when_completed) {
+            if (now - complete_time > TIME_TO_STOP_REPORTING) {
+                report_base = now_complete;
+                completed = false;
+            }
+        } else {
+            completed = true;
+            num_when_completed = num_complete;
+            complete_time = now;
+        }
     }
-    return last_updated_count;
-}
-
-std::size_t ShaderNotify::GetShadersBuildingAccurate() {
-    std::shared_lock lock{mutex};
-    return accurate_count;
-}
-
-void ShaderNotify::MarkShaderComplete() {
-    std::unique_lock lock{mutex};
-    accurate_count--;
-}
-
-void ShaderNotify::MarkSharderBuilding() {
-    std::unique_lock lock{mutex};
-    accurate_count++;
+    return now_building - report_base;
 }
 
 } // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
index a9c92d179..ad363bfb5 100644
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -4,26 +4,30 @@
 
 #pragma once
 
+#include <atomic>
 #include <chrono>
-#include <shared_mutex>
-#include "common/common_types.h"
+#include <optional>
 
 namespace VideoCore {
 class ShaderNotify {
 public:
-    ShaderNotify();
-    ~ShaderNotify();
+    [[nodiscard]] int ShadersBuilding() noexcept;
 
-    std::size_t GetShadersBuilding();
-    std::size_t GetShadersBuildingAccurate();
+    void MarkShaderComplete() noexcept {
+        ++num_complete;
+    }
 
-    void MarkShaderComplete();
-    void MarkSharderBuilding();
+    void MarkShaderBuilding() noexcept {
+        ++num_building;
+    }
 
 private:
-    std::size_t last_updated_count{};
-    std::size_t accurate_count{};
-    std::shared_mutex mutex;
-    std::chrono::high_resolution_clock::time_point last_update{};
+    std::atomic_int num_building{};
+    std::atomic_int num_complete{};
+    int report_base{};
+
+    bool completed{};
+    int num_when_completed{};
+    std::chrono::high_resolution_clock::time_point complete_time;
 };
 } // namespace VideoCore
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index d10ba4ccd..249cc4d0f 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
     return "Invalid";
 }
 
-std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
+std::string Name(const ImageViewBase& image_view) {
     const u32 width = image_view.size.width;
     const u32 height = image_view.size.height;
     const u32 depth = image_view.size.depth;
@@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
     const u32 num_layers = image_view.range.extent.layers;
 
     const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
-    switch (type.value_or(image_view.type)) {
+    switch (image_view.type) {
     case ImageViewType::e1D:
         return fmt::format("ImageView 1D {}{}", width, level);
     case ImageViewType::e2D:
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index a48413983..c6cf0583f 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -255,8 +255,7 @@ struct RenderTargets;
 
 [[nodiscard]] std::string Name(const ImageBase& image);
 
-[[nodiscard]] std::string Name(const ImageViewBase& image_view,
-                               std::optional<ImageViewType> type = std::nullopt);
+[[nodiscard]] std::string Name(const ImageViewBase& image_view);
 
 [[nodiscard]] std::string Name(const RenderTargets& render_targets);
 
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index e8d632f9e..450becbeb 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
     }
 }
 
+ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
+    : format{info.format}, type{ImageViewType::Buffer}, size{
+                                                            .width = info.size.width,
+                                                            .height = 1,
+                                                            .depth = 1,
+                                                        } {
+    ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
+}
+
 ImageViewBase::ImageViewBase(const NullImageParams&) {}
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 73954167e..903f715c5 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
 struct ImageViewBase {
     explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
                            ImageId image_id);
+    explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
     explicit ImageViewBase(const NullImageParams&);
 
     [[nodiscard]] bool IsBuffer() const noexcept {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 85ce06d56..f34c9d9ca 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -117,6 +117,9 @@ public:
     /// Return a reference to the given image view id
     [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
 
+    /// Mark an image as modified from the GPU
+    void MarkModification(ImageId id) noexcept;
+
     /// Fill image_view_ids with the graphics images in indices
     void FillGraphicsImageViews(std::span<const u32> indices,
                                 std::span<ImageViewId> image_view_ids);
@@ -527,6 +530,11 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
 }
 
 template <class P>
+void TextureCache<P>::MarkModification(ImageId id) noexcept {
+    MarkModification(slot_images[id]);
+}
+
+template <class P>
 void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
                                              std::span<ImageViewId> image_view_ids) {
     FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
@@ -540,13 +548,13 @@ void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
 
 template <class P>
 typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
-    [[unlikely]] if (index > graphics_sampler_table.Limit()) {
-        LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+    if (index > graphics_sampler_table.Limit()) {
+        LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
         return &slot_samplers[NULL_SAMPLER_ID];
     }
     const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
     SamplerId& id = graphics_sampler_ids[index];
-    [[unlikely]] if (is_new) {
+    if (is_new) {
         id = FindSampler(descriptor);
     }
     return &slot_samplers[id];
@@ -554,13 +562,13 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
 
 template <class P>
 typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
-    [[unlikely]] if (index > compute_sampler_table.Limit()) {
-        LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+    if (index > compute_sampler_table.Limit()) {
+        LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
         return &slot_samplers[NULL_SAMPLER_ID];
     }
     const auto [descriptor, is_new] = compute_sampler_table.Read(index);
     SamplerId& id = compute_sampler_ids[index];
-    [[unlikely]] if (is_new) {
+    if (is_new) {
         id = FindSampler(descriptor);
     }
     return &slot_samplers[id];
@@ -661,7 +669,7 @@ ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
                                             std::span<ImageViewId> cached_image_view_ids,
                                             u32 index) {
     if (index > table.Limit()) {
-        LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
+        LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
         return NULL_IMAGE_VIEW_ID;
     }
     const auto [descriptor, is_new] = table.Read(index);
@@ -968,9 +976,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
         auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
         ConvertImage(unswizzled_data, image.info, mapped_span, copies);
         image.UploadMemory(staging, copies);
-    } else if (image.info.type == ImageType::Buffer) {
-        const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
-        image.UploadMemory(staging, copies);
     } else {
         const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
         image.UploadMemory(staging, copies);
@@ -993,7 +998,12 @@ ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
 template <class P>
 ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
     const ImageInfo info(config);
-    const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
+    if (info.type == ImageType::Buffer) {
+        const ImageViewInfo view_info(config, 0);
+        return slot_image_views.insert(runtime, info, view_info, config.Address());
+    }
+    const u32 layer_offset = config.BaseLayer() * info.layer_stride;
+    const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
     const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
     if (!image_id) {
         return NULL_IMAGE_VIEW_ID;
@@ -1801,6 +1811,9 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
         return;
     }
     const ImageViewBase& image_view = slot_image_views[image_view_id];
+    if (image_view.IsBuffer()) {
+        return;
+    }
     PrepareImage(image_view.image_id, is_modification, invalidate);
 }
 
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c1d14335e..1a9399455 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -154,6 +154,15 @@ union TextureHandle {
 };
 static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
 
+[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
+    if (via_header_index) {
+        return {raw, raw};
+    } else {
+        const Tegra::Texture::TextureHandle handle{raw};
+        return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
+    }
+}
+
 struct TICEntry {
     union {
         struct {
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
new file mode 100644
index 000000000..ba26ac3f1
--- /dev/null
+++ b/src/video_core/transform_feedback.cpp
@@ -0,0 +1,99 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/transform_feedback.h"
+
+namespace VideoCommon {
+
+std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+    const TransformFeedbackState& state) {
+    static constexpr std::array VECTORS{
+        28,  // gl_Position
+        32,  // Generic 0
+        36,  // Generic 1
+        40,  // Generic 2
+        44,  // Generic 3
+        48,  // Generic 4
+        52,  // Generic 5
+        56,  // Generic 6
+        60,  // Generic 7
+        64,  // Generic 8
+        68,  // Generic 9
+        72,  // Generic 10
+        76,  // Generic 11
+        80,  // Generic 12
+        84,  // Generic 13
+        88,  // Generic 14
+        92,  // Generic 15
+        96,  // Generic 16
+        100, // Generic 17
+        104, // Generic 18
+        108, // Generic 19
+        112, // Generic 20
+        116, // Generic 21
+        120, // Generic 22
+        124, // Generic 23
+        128, // Generic 24
+        132, // Generic 25
+        136, // Generic 26
+        140, // Generic 27
+        144, // Generic 28
+        148, // Generic 29
+        152, // Generic 30
+        156, // Generic 31
+        160, // gl_FrontColor
+        164, // gl_FrontSecondaryColor
+        160, // gl_BackColor
+        164, // gl_BackSecondaryColor
+        192, // gl_TexCoord[0]
+        196, // gl_TexCoord[1]
+        200, // gl_TexCoord[2]
+        204, // gl_TexCoord[3]
+        208, // gl_TexCoord[4]
+        212, // gl_TexCoord[5]
+        216, // gl_TexCoord[6]
+        220, // gl_TexCoord[7]
+    };
+    std::vector<Shader::TransformFeedbackVarying> xfb(256);
+    for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
+        const auto& locations = state.varyings[buffer];
+        const auto& layout = state.layouts[buffer];
+        const u32 varying_count = layout.varying_count;
+        u32 highest = 0;
+        for (u32 offset = 0; offset < varying_count; ++offset) {
+            const u32 base_offset = offset;
+            const u8 location = locations[offset];
+
+            UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream);
+            Shader::TransformFeedbackVarying varying{
+                .buffer = static_cast<u32>(buffer),
+                .stride = layout.stride,
+                .offset = offset * 4,
+                .components = 1,
+            };
+            if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) {
+                UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
+
+                const u8 base_index = location / 4;
+                while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
+                    ++offset;
+                    ++varying.components;
+                }
+            }
+            xfb[location] = varying;
+            highest = std::max(highest, (base_offset + varying.components) * 4);
+        }
+        UNIMPLEMENTED_IF(highest != layout.stride);
+    }
+    return xfb;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
new file mode 100644
index 000000000..8f6946d65
--- /dev/null
+++ b/src/video_core/transform_feedback.h
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include "common/common_types.h"
+#include "shader_recompiler/runtime_info.h"
+#include "video_core/engines/maxwell_3d.h"
+
+namespace VideoCommon {
+
+struct TransformFeedbackState {
+    struct Layout {
+        u32 stream;
+        u32 varying_count;
+        u32 stride;
+    };
+    std::array<Layout, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> layouts;
+    std::array<std::array<u8, 128>, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+        varyings;
+};
+
+std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+    const TransformFeedbackState& state);
+
+} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index 758c038ba..fdd1a5081 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() {
     }
 }
 
-void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
+void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const {
     if (!initialized) {
         return;
     }
-
-    std::vector<u32> spirv_copy = spirv;
+    std::vector<u32> spirv_copy(spirv.begin(), spirv.end());
     GFSDK_Aftermath_SpirvCode shader;
     shader.pData = spirv_copy.data();
     shader.size = static_cast<u32>(spirv_copy.size() * 4);
@@ -100,7 +99,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
         LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
         return;
     }
-    if (file.Write(spirv) != spirv.size()) {
+    if (file.WriteSpan(spirv) != spirv.size()) {
         LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
         return;
     }
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index 4fe2b14d9..eae1891dd 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -6,6 +6,7 @@
 
 #include <filesystem>
 #include <mutex>
+#include <span>
 #include <string>
 #include <vector>
 
@@ -33,7 +34,7 @@ public:
     NsightAftermathTracker(NsightAftermathTracker&&) = delete;
     NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
 
-    void SaveShader(const std::vector<u32>& spirv) const;
+    void SaveShader(std::span<const u32> spirv) const;
 
 private:
 #ifdef HAS_NSIGHT_AFTERMATH
@@ -61,21 +62,21 @@ private:
     bool initialized = false;
 
     Common::DynamicLibrary dl;
-    PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
-    PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
-    PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
-    PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
-    PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
-    PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
-    PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
-    PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
+    PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{};
+    PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{};
+    PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{};
+    PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{};
+    PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{};
+    PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{};
+    PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{};
+    PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{};
 #endif
 };
 
 #ifndef HAS_NSIGHT_AFTERMATH
 inline NsightAftermathTracker::NsightAftermathTracker() = default;
 inline NsightAftermathTracker::~NsightAftermathTracker() = default;
-inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
+inline void NsightAftermathTracker::SaveShader(std::span<const u32>) const {}
 #endif
 
 } // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f214510da..44afdc1cd 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <bitset>
 #include <chrono>
 #include <optional>
@@ -33,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
 };
 } // namespace Alternatives
 
+enum class NvidiaArchitecture {
+    AmpereOrNewer,
+    Turing,
+    VoltaOrOlder,
+};
+
 constexpr std::array REQUIRED_EXTENSIONS{
     VK_KHR_MAINTENANCE1_EXTENSION_NAME,
     VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
@@ -43,11 +50,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
     VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
     VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
     VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
+    VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
+    VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
     VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
     VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
     VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
     VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
     VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
+    VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
 #ifdef _WIN32
     VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
 #endif
@@ -112,6 +122,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
         VK_FORMAT_R16G16_SFLOAT,
         VK_FORMAT_R16G16_SINT,
         VK_FORMAT_R16_UNORM,
+        VK_FORMAT_R16_SNORM,
         VK_FORMAT_R16_UINT,
         VK_FORMAT_R8G8B8A8_SRGB,
         VK_FORMAT_R8G8_UNORM,
@@ -191,15 +202,47 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
     return format_properties;
 }
 
+std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
+    const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
+    std::vector<std::string> supported_extensions;
+    supported_extensions.reserve(extensions.size());
+    for (const auto& extension : extensions) {
+        supported_extensions.emplace_back(extension.extensionName);
+    }
+    return supported_extensions;
+}
+
+NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
+                                         std::span<const std::string> exts) {
+    if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) {
+        VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
+        shading_rate_props.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
+        VkPhysicalDeviceProperties2KHR physical_properties{};
+        physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+        physical_properties.pNext = &shading_rate_props;
+        physical.GetProperties2KHR(physical_properties);
+        if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
+            // Only Ampere and newer support this feature
+            return NvidiaArchitecture::AmpereOrNewer;
+        }
+    }
+    if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) {
+        return NvidiaArchitecture::Turing;
+    }
+    return NvidiaArchitecture::VoltaOrOlder;
+}
 } // Anonymous namespace
 
 Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
                const vk::InstanceDispatch& dld_)
     : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
-      format_properties{GetFormatProperties(physical)} {
+      supported_extensions{GetSupportedExtensions(physical)},
+      format_properties(GetFormatProperties(physical)) {
     CheckSuitability(surface != nullptr);
     SetupFamilies(surface);
     SetupFeatures();
+    SetupProperties();
 
     const auto queue_cis = GetDeviceQueueCreateInfos();
     const std::vector extensions = LoadExtensions(surface != nullptr);
@@ -214,16 +257,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
             .independentBlend = true,
             .geometryShader = true,
             .tessellationShader = true,
-            .sampleRateShading = false,
-            .dualSrcBlend = false,
+            .sampleRateShading = true,
+            .dualSrcBlend = true,
             .logicOp = false,
             .multiDrawIndirect = false,
             .drawIndirectFirstInstance = false,
             .depthClamp = true,
             .depthBiasClamp = true,
-            .fillModeNonSolid = false,
-            .depthBounds = false,
-            .wideLines = false,
+            .fillModeNonSolid = true,
+            .depthBounds = is_depth_bounds_supported,
+            .wideLines = true,
             .largePoints = true,
             .alphaToOne = false,
             .multiViewport = true,
@@ -245,11 +288,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
             .shaderSampledImageArrayDynamicIndexing = false,
             .shaderStorageBufferArrayDynamicIndexing = false,
             .shaderStorageImageArrayDynamicIndexing = false,
-            .shaderClipDistance = false,
-            .shaderCullDistance = false,
-            .shaderFloat64 = false,
-            .shaderInt64 = false,
-            .shaderInt16 = false,
+            .shaderClipDistance = true,
+            .shaderCullDistance = true,
+            .shaderFloat64 = is_shader_float64_supported,
+            .shaderInt64 = is_shader_int64_supported,
+            .shaderInt16 = is_shader_int16_supported,
             .shaderResourceResidency = false,
             .shaderResourceMinLod = false,
             .sparseBinding = false,
@@ -278,7 +321,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
     VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
         .pNext = nullptr,
-        .storageBuffer16BitAccess = false,
+        .storageBuffer16BitAccess = true,
         .uniformAndStorageBuffer16BitAccess = true,
         .storagePushConstant16 = false,
         .storageInputOutput16 = false,
@@ -310,6 +353,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
     };
     SetNext(next, host_query_reset);
 
+    VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR,
+        .pNext = nullptr,
+        .variablePointersStorageBuffer = VK_TRUE,
+        .variablePointers = VK_TRUE,
+    };
+    SetNext(next, variable_pointers);
+
+    VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
+        .pNext = nullptr,
+        .shaderDemoteToHelperInvocation = true,
+    };
+    SetNext(next, demote);
+
     VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
     if (is_float16_supported) {
         float16_int8 = {
@@ -327,6 +385,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
         LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
     }
 
+    if (!nv_viewport_array2) {
+        LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks");
+    }
+
+    if (!nv_geometry_shader_passthrough) {
+        LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders");
+    }
+
     VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
     if (khr_uniform_buffer_standard_layout) {
         std430_layout = {
@@ -389,12 +455,83 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
         LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
     }
 
+    VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
+    if (ext_line_rasterization) {
+        line_raster = {
+            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT,
+            .pNext = nullptr,
+            .rectangularLines = VK_TRUE,
+            .bresenhamLines = VK_FALSE,
+            .smoothLines = VK_TRUE,
+            .stippledRectangularLines = VK_FALSE,
+            .stippledBresenhamLines = VK_FALSE,
+            .stippledSmoothLines = VK_FALSE,
+        };
+        SetNext(next, line_raster);
+    } else {
+        LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines");
+    }
+
+    if (!ext_conservative_rasterization) {
+        LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization");
+    }
+
+    VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
+    if (ext_provoking_vertex) {
+        provoking_vertex = {
+            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
+            .pNext = nullptr,
+            .provokingVertexLast = VK_TRUE,
+            .transformFeedbackPreservesProvokingVertex = VK_TRUE,
+        };
+        SetNext(next, provoking_vertex);
+    } else {
+        LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last");
+    }
+
+    VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic;
+    if (ext_vertex_input_dynamic_state) {
+        vertex_input_dynamic = {
+            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT,
+            .pNext = nullptr,
+            .vertexInputDynamicState = VK_TRUE,
+        };
+        SetNext(next, vertex_input_dynamic);
+    } else {
+        LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state");
+    }
+
+    VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64;
+    if (ext_shader_atomic_int64) {
+        atomic_int64 = {
+            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR,
+            .pNext = nullptr,
+            .shaderBufferInt64Atomics = VK_TRUE,
+            .shaderSharedInt64Atomics = VK_TRUE,
+        };
+        SetNext(next, atomic_int64);
+    }
+
+    VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
+    if (khr_workgroup_memory_explicit_layout) {
+        workgroup_layout = {
+            .sType =
+                VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
+            .pNext = nullptr,
+            .workgroupMemoryExplicitLayout = VK_TRUE,
+            .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
+            .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
+            .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
+        };
+        SetNext(next, workgroup_layout);
+    }
+
     if (!ext_depth_range_unrestricted) {
         LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
     }
 
     VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
-    if (nv_device_diagnostics_config) {
+    if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) {
         nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
 
         diagnostics_nv = {
@@ -412,11 +549,33 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
     CollectTelemetryParameters();
     CollectToolingInfo();
 
+    if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
+        const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
+        switch (arch) {
+        case NvidiaArchitecture::AmpereOrNewer:
+            LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
+            is_float16_supported = false;
+            break;
+        case NvidiaArchitecture::Turing:
+            break;
+        case NvidiaArchitecture::VoltaOrOlder:
+            LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor");
+            khr_push_descriptor = false;
+            break;
+        }
+    }
     if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
-        LOG_WARNING(
-            Render_Vulkan,
-            "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
-        ext_extended_dynamic_state = false;
+        // Mask driver version variant
+        const u32 version = (properties.driverVersion << 3) >> 3;
+        if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
+            LOG_WARNING(Render_Vulkan,
+                        "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
+            ext_extended_dynamic_state = false;
+        }
+    }
+    if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+        LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
+        ext_vertex_input_dynamic_state = false;
     }
     if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
         // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
@@ -426,8 +585,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
 
     graphics_queue = logical.GetQueue(graphics_family);
     present_queue = logical.GetQueue(present_family);
-
-    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
 }
 
 Device::~Device() = default;
@@ -471,7 +628,7 @@ void Device::ReportLoss() const {
     std::this_thread::sleep_for(std::chrono::seconds{15});
 }
 
-void Device::SaveShader(const std::vector<u32>& spirv) const {
+void Device::SaveShader(std::span<const u32> spirv) const {
     if (nsight_aftermath_tracker) {
         nsight_aftermath_tracker->SaveShader(spirv);
     }
@@ -597,10 +754,20 @@ void Device::CheckSuitability(bool requires_swapchain) const {
             throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
         }
     }
+    VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
+    demote.sType =
+        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
+    demote.pNext = nullptr;
+
+    VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{};
+    variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR;
+    variable_pointers.pNext = &demote;
+
     VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
     robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
+    robustness2.pNext = &variable_pointers;
 
-    VkPhysicalDeviceFeatures2 features2{};
+    VkPhysicalDeviceFeatures2KHR features2{};
     features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
     features2.pNext = &robustness2;
 
@@ -610,7 +777,6 @@ void Device::CheckSuitability(bool requires_swapchain) const {
     const std::array feature_report{
         std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
         std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
-        std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
         std::make_pair(features.imageCubeArray, "imageCubeArray"),
         std::make_pair(features.independentBlend, "independentBlend"),
         std::make_pair(features.depthClamp, "depthClamp"),
@@ -618,13 +784,23 @@ void Device::CheckSuitability(bool requires_swapchain) const {
         std::make_pair(features.largePoints, "largePoints"),
         std::make_pair(features.multiViewport, "multiViewport"),
         std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
+        std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"),
+        std::make_pair(features.wideLines, "wideLines"),
         std::make_pair(features.geometryShader, "geometryShader"),
         std::make_pair(features.tessellationShader, "tessellationShader"),
+        std::make_pair(features.sampleRateShading, "sampleRateShading"),
+        std::make_pair(features.dualSrcBlend, "dualSrcBlend"),
         std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
         std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
         std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
         std::make_pair(features.shaderStorageImageWriteWithoutFormat,
                        "shaderStorageImageWriteWithoutFormat"),
+        std::make_pair(features.shaderClipDistance, "shaderClipDistance"),
+        std::make_pair(features.shaderCullDistance, "shaderCullDistance"),
+        std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
+        std::make_pair(variable_pointers.variablePointers, "variablePointers"),
+        std::make_pair(variable_pointers.variablePointersStorageBuffer,
+                       "variablePointersStorageBuffer"),
         std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
         std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
         std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
@@ -647,14 +823,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
     }
 
     bool has_khr_shader_float16_int8{};
+    bool has_khr_workgroup_memory_explicit_layout{};
     bool has_ext_subgroup_size_control{};
     bool has_ext_transform_feedback{};
     bool has_ext_custom_border_color{};
     bool has_ext_extended_dynamic_state{};
-    for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
+    bool has_ext_shader_atomic_int64{};
+    bool has_ext_provoking_vertex{};
+    bool has_ext_vertex_input_dynamic_state{};
+    bool has_ext_line_rasterization{};
+    for (const std::string& extension : supported_extensions) {
         const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
                               bool push) {
-            if (extension.extensionName != std::string_view(name)) {
+            if (extension != name) {
                 return;
             }
             if (push) {
@@ -665,8 +846,13 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
             }
         };
         test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
+        test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true);
+        test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME,
+             true);
         test(khr_uniform_buffer_standard_layout,
              VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
+        test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
+        test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
         test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
         test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
         test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@@ -675,16 +861,25 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
              true);
         test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
         test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
+        test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME,
+             true);
         test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
         test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
         test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
         test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
-        if (Settings::values.renderer_debug) {
+        test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
+        test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME,
+             false);
+        test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
+        test(has_khr_workgroup_memory_explicit_layout,
+             VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
+        test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
+        if (Settings::values.enable_nsight_aftermath) {
             test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
                  true);
         }
     }
-    VkPhysicalDeviceFeatures2KHR features;
+    VkPhysicalDeviceFeatures2KHR features{};
     features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
 
     VkPhysicalDeviceProperties2KHR physical_properties;
@@ -722,10 +917,49 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
             subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
             extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
             guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
+            ext_subgroup_size_control = true;
         }
     } else {
         is_warp_potentially_bigger = true;
     }
+    if (has_ext_provoking_vertex) {
+        VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
+        provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT;
+        provoking_vertex.pNext = nullptr;
+        features.pNext = &provoking_vertex;
+        physical.GetFeatures2KHR(features);
+
+        if (provoking_vertex.provokingVertexLast &&
+            provoking_vertex.transformFeedbackPreservesProvokingVertex) {
+            extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
+            ext_provoking_vertex = true;
+        }
+    }
+    if (has_ext_vertex_input_dynamic_state) {
+        VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input;
+        vertex_input.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT;
+        vertex_input.pNext = nullptr;
+        features.pNext = &vertex_input;
+        physical.GetFeatures2KHR(features);
+
+        if (vertex_input.vertexInputDynamicState) {
+            extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+            ext_vertex_input_dynamic_state = true;
+        }
+    }
+    if (has_ext_shader_atomic_int64) {
+        VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
+        atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+        atomic_int64.pNext = nullptr;
+        features.pNext = &atomic_int64;
+        physical.GetFeatures2KHR(features);
+
+        if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
+            extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
+            ext_shader_atomic_int64 = true;
+        }
+    }
     if (has_ext_transform_feedback) {
         VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
         tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -760,17 +994,55 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
         }
     }
     if (has_ext_extended_dynamic_state) {
-        VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
-        dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
-        dynamic_state.pNext = nullptr;
-        features.pNext = &dynamic_state;
+        VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state;
+        extended_dynamic_state.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+        extended_dynamic_state.pNext = nullptr;
+        features.pNext = &extended_dynamic_state;
         physical.GetFeatures2KHR(features);
 
-        if (dynamic_state.extendedDynamicState) {
+        if (extended_dynamic_state.extendedDynamicState) {
             extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
             ext_extended_dynamic_state = true;
         }
     }
+    if (has_ext_line_rasterization) {
+        VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
+        line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT;
+        line_raster.pNext = nullptr;
+        features.pNext = &line_raster;
+        physical.GetFeatures2KHR(features);
+        if (line_raster.rectangularLines && line_raster.smoothLines) {
+            extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME);
+            ext_line_rasterization = true;
+        }
+    }
+    if (has_khr_workgroup_memory_explicit_layout) {
+        VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
+        layout.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
+        layout.pNext = nullptr;
+        features.pNext = &layout;
+        physical.GetFeatures2KHR(features);
+
+        if (layout.workgroupMemoryExplicitLayout &&
+            layout.workgroupMemoryExplicitLayout8BitAccess &&
+            layout.workgroupMemoryExplicitLayout16BitAccess &&
+            layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
+            extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
+            khr_workgroup_memory_explicit_layout = true;
+        }
+    }
+    if (khr_push_descriptor) {
+        VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
+        push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
+        push_descriptor.pNext = nullptr;
+
+        physical_properties.pNext = &push_descriptor;
+        physical.GetProperties2KHR(physical_properties);
+
+        max_push_descriptors = push_descriptor.maxPushDescriptors;
+    }
     return extensions;
 }
 
@@ -806,11 +1078,25 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
 }
 
 void Device::SetupFeatures() {
-    const auto supported_features{physical.GetFeatures()};
-    is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
-    is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample;
+    const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
+    is_depth_bounds_supported = features.depthBounds;
+    is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat;
+    is_shader_float64_supported = features.shaderFloat64;
+    is_shader_int64_supported = features.shaderInt64;
+    is_shader_int16_supported = features.shaderInt16;
+    is_shader_storage_image_multisample = features.shaderStorageImageMultisample;
     is_blit_depth_stencil_supported = TestDepthStencilBlits();
-    is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
+    is_optimal_astc_supported = IsOptimalAstcSupported(features);
+}
+
+void Device::SetupProperties() {
+    float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
+
+    VkPhysicalDeviceProperties2KHR properties2{};
+    properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+    properties2.pNext = &float_controls;
+
+    physical.GetProperties2KHR(properties2);
 }
 
 void Device::CollectTelemetryParameters() {
@@ -832,12 +1118,6 @@ void Device::CollectTelemetryParameters() {
 
     driver_id = driver.driverID;
     vendor_name = driver.driverName;
-
-    const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
-    reported_extensions.reserve(std::size(extensions));
-    for (const auto& extension : extensions) {
-        reported_extensions.emplace_back(extension.extensionName);
-    }
 }
 
 void Device::CollectPhysicalMemoryInfo() {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 96c0f8c60..df394e384 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <span>
 #include <string>
 #include <string_view>
 #include <unordered_map>
@@ -43,7 +44,7 @@ public:
     void ReportLoss() const;
 
     /// Reports a shader to Nsight Aftermath.
-    void SaveShader(const std::vector<u32>& spirv) const;
+    void SaveShader(std::span<const u32> spirv) const;
 
     /// Returns the name of the VkDriverId reported from Vulkan.
     std::string GetDriverName() const;
@@ -128,6 +129,11 @@ public:
         return properties.limits.maxComputeSharedMemorySize;
     }
 
+    /// Returns float control properties of the device.
+    const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
+        return float_controls;
+    }
+
     /// Returns true if ASTC is natively supported.
     bool IsOptimalAstcSupported() const {
         return is_optimal_astc_supported;
@@ -148,11 +154,31 @@ public:
         return guest_warp_stages & stage;
     }
 
+    /// Returns the maximum number of push descriptors.
+    u32 MaxPushDescriptors() const {
+        return max_push_descriptors;
+    }
+
     /// Returns true if formatless image load is supported.
     bool IsFormatlessImageLoadSupported() const {
         return is_formatless_image_load_supported;
     }
 
+    /// Returns true if shader int64 is supported.
+    bool IsShaderInt64Supported() const {
+        return is_shader_int64_supported;
+    }
+
+    /// Returns true if shader int16 is supported.
+    bool IsShaderInt16Supported() const {
+        return is_shader_int16_supported;
+    }
+
+    // Returns true if depth bounds is supported.
+    bool IsDepthBoundsSupported() const {
+        return is_depth_bounds_supported;
+    }
+
     /// Returns true when blitting from and to depth stencil images is supported.
     bool IsBlitDepthStencilSupported() const {
         return is_blit_depth_stencil_supported;
@@ -163,11 +189,36 @@ public:
         return nv_viewport_swizzle;
     }
 
-    /// Returns true if the device supports VK_EXT_scalar_block_layout.
+    /// Returns true if the device supports VK_NV_viewport_array2.
+    bool IsNvViewportArray2Supported() const {
+        return nv_viewport_array2;
+    }
+
+    /// Returns true if the device supports VK_NV_geometry_shader_passthrough.
+    bool IsNvGeometryShaderPassthroughSupported() const {
+        return nv_geometry_shader_passthrough;
+    }
+
+    /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
     bool IsKhrUniformBufferStandardLayoutSupported() const {
         return khr_uniform_buffer_standard_layout;
     }
 
+    /// Returns true if the device supports VK_KHR_spirv_1_4.
+    bool IsKhrSpirv1_4Supported() const {
+        return khr_spirv_1_4;
+    }
+
+    /// Returns true if the device supports VK_KHR_push_descriptor.
+    bool IsKhrPushDescriptorSupported() const {
+        return khr_push_descriptor;
+    }
+
+    /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
+    bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
+        return khr_workgroup_memory_explicit_layout;
+    }
+
     /// Returns true if the device supports VK_EXT_index_type_uint8.
     bool IsExtIndexTypeUint8Supported() const {
         return ext_index_type_uint8;
@@ -188,6 +239,11 @@ public:
         return ext_shader_viewport_index_layer;
     }
 
+    /// Returns true if the device supports VK_EXT_subgroup_size_control.
+    bool IsExtSubgroupSizeControlSupported() const {
+        return ext_subgroup_size_control;
+    }
+
     /// Returns true if the device supports VK_EXT_transform_feedback.
     bool IsExtTransformFeedbackSupported() const {
         return ext_transform_feedback;
@@ -203,11 +259,36 @@ public:
         return ext_extended_dynamic_state;
     }
 
+    /// Returns true if the device supports VK_EXT_line_rasterization.
+    bool IsExtLineRasterizationSupported() const {
+        return ext_line_rasterization;
+    }
+
+    /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
+    bool IsExtVertexInputDynamicStateSupported() const {
+        return ext_vertex_input_dynamic_state;
+    }
+
     /// Returns true if the device supports VK_EXT_shader_stencil_export.
     bool IsExtShaderStencilExportSupported() const {
         return ext_shader_stencil_export;
     }
 
+    /// Returns true if the device supports VK_EXT_conservative_rasterization.
+    bool IsExtConservativeRasterizationSupported() const {
+        return ext_conservative_rasterization;
+    }
+
+    /// Returns true if the device supports VK_EXT_provoking_vertex.
+    bool IsExtProvokingVertexSupported() const {
+        return ext_provoking_vertex;
+    }
+
+    /// Returns true if the device supports VK_KHR_shader_atomic_int64.
+    bool IsExtShaderAtomicInt64Supported() const {
+        return ext_shader_atomic_int64;
+    }
+
     /// Returns true when a known debugging tool is attached.
     bool HasDebuggingToolAttached() const {
         return has_renderdoc || has_nsight_graphics;
@@ -220,12 +301,7 @@ public:
 
     /// Returns the list of available extensions.
     const std::vector<std::string>& GetAvailableExtensions() const {
-        return reported_extensions;
-    }
-
-    /// Returns true if the setting for async shader compilation is enabled.
-    bool UseAsynchronousShaders() const {
-        return use_asynchronous_shaders;
+        return supported_extensions;
     }
 
     u64 GetDeviceLocalMemory() const {
@@ -245,6 +321,9 @@ private:
     /// Sets up device features.
     void SetupFeatures();
 
+    /// Sets up device properties.
+    void SetupProperties();
+
     /// Collects telemetry information from the device.
     void CollectTelemetryParameters();
 
@@ -267,46 +346,60 @@ private:
     bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
                            FormatType format_type) const;
 
-    VkInstance instance;                        ///< Vulkan instance.
-    vk::DeviceDispatch dld;                     ///< Device function pointers.
-    vk::PhysicalDevice physical;                ///< Physical device.
-    VkPhysicalDeviceProperties properties;      ///< Device properties.
-    vk::Device logical;                         ///< Logical device.
-    vk::Queue graphics_queue;                   ///< Main graphics queue.
-    vk::Queue present_queue;                    ///< Main present queue.
-    u32 instance_version{};                     ///< Vulkan onstance version.
+    VkInstance instance;                                         ///< Vulkan instance.
+    vk::DeviceDispatch dld;                                      ///< Device function pointers.
+    vk::PhysicalDevice physical;                                 ///< Physical device.
+    VkPhysicalDeviceProperties properties;                       ///< Device properties.
+    VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties.
+    vk::Device logical;                                          ///< Logical device.
+    vk::Queue graphics_queue;                                    ///< Main graphics queue.
+    vk::Queue present_queue;                                     ///< Main present queue.
+    u32 instance_version{};                                      ///< Vulkan onstance version.
     u32 graphics_family{};                      ///< Main graphics queue family index.
     u32 present_family{};                       ///< Main present queue family index.
     VkDriverIdKHR driver_id{};                  ///< Driver ID.
     VkShaderStageFlags guest_warp_stages{};     ///< Stages where the guest warp size can be forced.
     u64 device_access_memory{};                 ///< Total size of device local memory in bytes.
+    u32 max_push_descriptors{};                 ///< Maximum number of push descriptors
     bool is_optimal_astc_supported{};           ///< Support for native ASTC.
     bool is_float16_supported{};                ///< Support for float16 arithmetics.
     bool is_warp_potentially_bigger{};          ///< Host warp size can be bigger than guest.
     bool is_formatless_image_load_supported{};  ///< Support for shader image read without format.
+    bool is_depth_bounds_supported{};           ///< Support for depth bounds.
+    bool is_shader_float64_supported{};         ///< Support for float64.
+    bool is_shader_int64_supported{};           ///< Support for int64.
+    bool is_shader_int16_supported{};           ///< Support for int16.
     bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
     bool is_blit_depth_stencil_supported{};     ///< Support for blitting from and to depth stencil.
     bool nv_viewport_swizzle{};                 ///< Support for VK_NV_viewport_swizzle.
-    bool khr_uniform_buffer_standard_layout{};  ///< Support for std430 on UBOs.
-    bool ext_index_type_uint8{};                ///< Support for VK_EXT_index_type_uint8.
-    bool ext_sampler_filter_minmax{};           ///< Support for VK_EXT_sampler_filter_minmax.
-    bool ext_depth_range_unrestricted{};        ///< Support for VK_EXT_depth_range_unrestricted.
-    bool ext_shader_viewport_index_layer{};     ///< Support for VK_EXT_shader_viewport_index_layer.
-    bool ext_tooling_info{};                    ///< Support for VK_EXT_tooling_info.
-    bool ext_transform_feedback{};              ///< Support for VK_EXT_transform_feedback.
-    bool ext_custom_border_color{};             ///< Support for VK_EXT_custom_border_color.
-    bool ext_extended_dynamic_state{};          ///< Support for VK_EXT_extended_dynamic_state.
-    bool ext_shader_stencil_export{};           ///< Support for VK_EXT_shader_stencil_export.
-    bool nv_device_diagnostics_config{};        ///< Support for VK_NV_device_diagnostics_config.
-    bool has_renderdoc{};                       ///< Has RenderDoc attached
-    bool has_nsight_graphics{};                 ///< Has Nsight Graphics attached
-
-    // Asynchronous Graphics Pipeline setting
-    bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
+    bool nv_viewport_array2{};                  ///< Support for VK_NV_viewport_array2.
+    bool nv_geometry_shader_passthrough{};      ///< Support for VK_NV_geometry_shader_passthrough.
+    bool khr_uniform_buffer_standard_layout{};  ///< Support for scalar uniform buffer layouts.
+    bool khr_spirv_1_4{};                       ///< Support for VK_KHR_spirv_1_4.
+    bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
+    bool khr_push_descriptor{};                  ///< Support for VK_KHR_push_descritor.
+    bool ext_index_type_uint8{};                 ///< Support for VK_EXT_index_type_uint8.
+    bool ext_sampler_filter_minmax{};            ///< Support for VK_EXT_sampler_filter_minmax.
+    bool ext_depth_range_unrestricted{};         ///< Support for VK_EXT_depth_range_unrestricted.
+    bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
+    bool ext_tooling_info{};                ///< Support for VK_EXT_tooling_info.
+    bool ext_subgroup_size_control{};       ///< Support for VK_EXT_subgroup_size_control.
+    bool ext_transform_feedback{};          ///< Support for VK_EXT_transform_feedback.
+    bool ext_custom_border_color{};         ///< Support for VK_EXT_custom_border_color.
+    bool ext_extended_dynamic_state{};      ///< Support for VK_EXT_extended_dynamic_state.
+    bool ext_line_rasterization{};          ///< Support for VK_EXT_line_rasterization.
+    bool ext_vertex_input_dynamic_state{};  ///< Support for VK_EXT_vertex_input_dynamic_state.
+    bool ext_shader_stencil_export{};       ///< Support for VK_EXT_shader_stencil_export.
+    bool ext_shader_atomic_int64{};         ///< Support for VK_KHR_shader_atomic_int64.
+    bool ext_conservative_rasterization{};  ///< Support for VK_EXT_conservative_rasterization.
+    bool ext_provoking_vertex{};            ///< Support for VK_EXT_provoking_vertex.
+    bool nv_device_diagnostics_config{};    ///< Support for VK_NV_device_diagnostics_config.
+    bool has_renderdoc{};                   ///< Has RenderDoc attached
+    bool has_nsight_graphics{};             ///< Has Nsight Graphics attached
 
     // Telemetry parameters
-    std::string vendor_name;                      ///< Device's driver name.
-    std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
+    std::string vendor_name;                       ///< Device's driver name.
+    std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions.
 
     /// Format properties dictionary.
     std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2aa0ffbe6..bbf0fccae 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkCmdFillBuffer);
     X(vkCmdPipelineBarrier);
     X(vkCmdPushConstants);
+    X(vkCmdPushDescriptorSetWithTemplateKHR);
     X(vkCmdSetBlendConstants);
     X(vkCmdSetDepthBias);
     X(vkCmdSetDepthBounds);
@@ -120,9 +121,11 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkCmdSetDepthTestEnableEXT);
     X(vkCmdSetDepthWriteEnableEXT);
     X(vkCmdSetFrontFaceEXT);
+    X(vkCmdSetLineWidth);
     X(vkCmdSetPrimitiveTopologyEXT);
     X(vkCmdSetStencilOpEXT);
     X(vkCmdSetStencilTestEnableEXT);
+    X(vkCmdSetVertexInputEXT);
     X(vkCmdResolveImage);
     X(vkCreateBuffer);
     X(vkCreateBufferView);
@@ -311,8 +314,6 @@ const char* ToString(VkResult result) noexcept {
         return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
     case VkResult::VK_ERROR_UNKNOWN:
         return "VK_ERROR_UNKNOWN";
-    case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
-        return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
     case VkResult::VK_THREAD_IDLE_KHR:
         return "VK_THREAD_IDLE_KHR";
     case VkResult::VK_THREAD_DONE_KHR:
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 3e36d356a..d76bb4324 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch {
     PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
     PFN_vkBindBufferMemory vkBindBufferMemory{};
     PFN_vkBindImageMemory vkBindImageMemory{};
+    PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
     PFN_vkCmdBeginQuery vkCmdBeginQuery{};
     PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
     PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
-    PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
     PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
     PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
     PFN_vkCmdBindPipeline vkCmdBindPipeline{};
     PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
     PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
+    PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
     PFN_vkCmdBlitImage vkCmdBlitImage{};
     PFN_vkCmdClearAttachments vkCmdClearAttachments{};
     PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
@@ -211,34 +212,36 @@ struct DeviceDispatch : InstanceDispatch {
     PFN_vkCmdDispatch vkCmdDispatch{};
     PFN_vkCmdDraw vkCmdDraw{};
     PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
+    PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
     PFN_vkCmdEndQuery vkCmdEndQuery{};
     PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
     PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
-    PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
     PFN_vkCmdFillBuffer vkCmdFillBuffer{};
     PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
     PFN_vkCmdPushConstants vkCmdPushConstants{};
+    PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{};
+    PFN_vkCmdResolveImage vkCmdResolveImage{};
     PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
+    PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
     PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
     PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
-    PFN_vkCmdSetEvent vkCmdSetEvent{};
-    PFN_vkCmdSetScissor vkCmdSetScissor{};
-    PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
-    PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
-    PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
-    PFN_vkCmdSetViewport vkCmdSetViewport{};
-    PFN_vkCmdWaitEvents vkCmdWaitEvents{};
-    PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
-    PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
     PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
     PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
     PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
     PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
+    PFN_vkCmdSetEvent vkCmdSetEvent{};
     PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
+    PFN_vkCmdSetLineWidth vkCmdSetLineWidth{};
     PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
+    PFN_vkCmdSetScissor vkCmdSetScissor{};
+    PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
     PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
+    PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
     PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
-    PFN_vkCmdResolveImage vkCmdResolveImage{};
+    PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
+    PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{};
+    PFN_vkCmdSetViewport vkCmdSetViewport{};
+    PFN_vkCmdWaitEvents vkCmdWaitEvents{};
     PFN_vkCreateBuffer vkCreateBuffer{};
     PFN_vkCreateBufferView vkCreateBufferView{};
     PFN_vkCreateCommandPool vkCreateCommandPool{};
@@ -989,6 +992,12 @@ public:
                                      dynamic_offsets.size(), dynamic_offsets.data());
     }
 
+    void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template,
+                                          VkPipelineLayout layout, u32 set,
+                                          const void* data) const noexcept {
+        dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data);
+    }
+
     void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
         dld->vkCmdBindPipeline(handle, bind_point, pipeline);
     }
@@ -1190,6 +1199,10 @@ public:
         dld->vkCmdSetFrontFaceEXT(handle, front_face);
     }
 
+    void SetLineWidth(float line_width) const noexcept {
+        dld->vkCmdSetLineWidth(handle, line_width);
+    }
+
     void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept {
         dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology);
     }
@@ -1203,6 +1216,13 @@ public:
         dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
     }
 
+    void SetVertexInputEXT(
+        vk::Span<VkVertexInputBindingDescription2EXT> bindings,
+        vk::Span<VkVertexInputAttributeDescription2EXT> attributes) const noexcept {
+        dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(),
+                                    attributes.data());
+    }
+
     void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
                                          const VkDeviceSize* offsets,
                                          const VkDeviceSize* sizes) const noexcept {
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index d72ca5acc..25b658b2a 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -64,12 +64,13 @@ void EmuThread::run() {
 
     emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
 
-    system.Renderer().ReadRasterizer()->LoadDiskResources(
-        system.CurrentProcess()->GetTitleID(), stop_token,
-        [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
-            emit LoadProgress(stage, value, total);
-        });
-
+    if (Settings::values.use_disk_shader_cache.GetValue()) {
+        system.Renderer().ReadRasterizer()->LoadDiskResources(
+            system.CurrentProcess()->GetTitleID(), stop_token,
+            [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
+                emit LoadProgress(stage, value, total);
+            });
+    }
     emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
 
     gpu.ReleaseContext();
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index a5e032959..52b3ed02e 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -814,7 +814,7 @@ void Config::ReadRendererValues() {
     ReadGlobalSetting(Settings::values.use_nvdec_emulation);
     ReadGlobalSetting(Settings::values.accelerate_astc);
     ReadGlobalSetting(Settings::values.use_vsync);
-    ReadGlobalSetting(Settings::values.use_assembly_shaders);
+    ReadGlobalSetting(Settings::values.shader_backend);
     ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
     ReadGlobalSetting(Settings::values.use_fast_gpu_time);
     ReadGlobalSetting(Settings::values.use_caches_gc);
@@ -824,6 +824,8 @@ void Config::ReadRendererValues() {
 
     if (global) {
         ReadBasicSetting(Settings::values.renderer_debug);
+        ReadBasicSetting(Settings::values.enable_nsight_aftermath);
+        ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks);
     }
 
     qt_config->endGroup();
@@ -1343,7 +1345,10 @@ void Config::SaveRendererValues() {
     WriteGlobalSetting(Settings::values.use_nvdec_emulation);
     WriteGlobalSetting(Settings::values.accelerate_astc);
     WriteGlobalSetting(Settings::values.use_vsync);
-    WriteGlobalSetting(Settings::values.use_assembly_shaders);
+    WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
+                 static_cast<u32>(Settings::values.shader_backend.GetValue(global)),
+                 static_cast<u32>(Settings::values.shader_backend.GetDefault()),
+                 Settings::values.shader_backend.UsingGlobal());
     WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
     WriteGlobalSetting(Settings::values.use_fast_gpu_time);
     WriteGlobalSetting(Settings::values.use_caches_gc);
@@ -1353,6 +1358,8 @@ void Config::SaveRendererValues() {
 
     if (global) {
         WriteBasicSetting(Settings::values.renderer_debug);
+        WriteBasicSetting(Settings::values.enable_nsight_aftermath);
+        WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks);
     }
 
     qt_config->endGroup();
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 96f9b6de1..4bbb9f1cd 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -180,5 +180,6 @@ private:
 
 // These metatype declarations cannot be in common/settings.h because core is devoid of QT
 Q_DECLARE_METATYPE(Settings::CPUAccuracy);
-Q_DECLARE_METATYPE(Settings::RendererBackend);
 Q_DECLARE_METATYPE(Settings::GPUAccuracy);
+Q_DECLARE_METATYPE(Settings::RendererBackend);
+Q_DECLARE_METATYPE(Settings::ShaderBackend);
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 8fceb3878..f7e29dbd7 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -45,8 +45,13 @@ void ConfigureDebug::SetConfiguration() {
     ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue());
     ui->enable_cpu_debugging->setEnabled(runtime_lock);
     ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue());
+    ui->enable_nsight_aftermath->setEnabled(runtime_lock);
+    ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue());
     ui->disable_macro_jit->setEnabled(runtime_lock);
     ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue());
+    ui->disable_loop_safety_checks->setEnabled(runtime_lock);
+    ui->disable_loop_safety_checks->setChecked(
+        Settings::values.disable_shader_loop_safety_checks.GetValue());
     ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue());
 }
 
@@ -61,6 +66,9 @@ void ConfigureDebug::ApplyConfiguration() {
     Settings::values.use_auto_stub = ui->use_auto_stub->isChecked();
     Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
     Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
+    Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();
+    Settings::values.disable_shader_loop_safety_checks =
+        ui->disable_loop_safety_checks->isChecked();
     Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
     Settings::values.extended_logging = ui->extended_logging->isChecked();
     Debugger::ToggleConsole();
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index 1260ad6f0..c8baf2921 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -126,6 +126,16 @@
        </widget>
       </item>
       <item>
+       <widget class="QCheckBox" name="enable_nsight_aftermath">
+        <property name="toolTip">
+         <string>When checked, it enables Nsight Aftermath crash dumps</string>
+        </property>
+        <property name="text">
+         <string>Enable Nsight Aftermath</string>
+        </property>
+       </widget>
+      </item>
+      <item>
        <widget class="QCheckBox" name="disable_macro_jit">
         <property name="enabled">
          <bool>true</bool>
@@ -138,6 +148,16 @@
         </property>
        </widget>
       </item>
+      <item>
+       <widget class="QCheckBox" name="disable_loop_safety_checks">
+        <property name="toolTip">
+         <string>When checked, it executes shaders without loop logic changes</string>
+        </property>
+        <property name="text">
+         <string>Disable Loop safety checks</string>
+        </property>
+       </widget>
+      </item>
      </layout>
     </widget>
    </item>
@@ -252,11 +272,17 @@
  <tabstops>
   <tabstop>log_filter_edit</tabstop>
   <tabstop>toggle_console</tabstop>
+  <tabstop>extended_logging</tabstop>
   <tabstop>open_log_button</tabstop>
   <tabstop>homebrew_args_edit</tabstop>
   <tabstop>enable_graphics_debugging</tabstop>
+  <tabstop>enable_nsight_aftermath</tabstop>
+  <tabstop>disable_macro_jit</tabstop>
+  <tabstop>disable_loop_safety_checks</tabstop>
   <tabstop>reporting_services</tabstop>
   <tabstop>quest_flag</tabstop>
+  <tabstop>use_debug_asserts</tabstop>
+  <tabstop>use_auto_stub</tabstop>
  </tabstops>
  <resources/>
  <connections/>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 4d5b4c0e6..fef211707 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -26,19 +26,29 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
 
     ui->setupUi(this);
 
+    for (const auto& device : vulkan_devices) {
+        ui->device->addItem(device);
+    }
+
+    ui->backend->addItem(QStringLiteral("GLSL"));
+    ui->backend->addItem(tr("GLASM (NVIDIA Only)"));
+    ui->backend->addItem(QStringLiteral("SPIR-V (Experimental, Mesa Only)"));
+
     SetupPerGameUI();
 
     SetConfiguration();
 
     connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] {
-        UpdateDeviceComboBox();
+        UpdateAPILayout();
         if (!Settings::IsConfiguringGlobal()) {
             ConfigurationShared::SetHighlight(
-                ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX);
+                ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX);
         }
     });
     connect(ui->device, qOverload<int>(&QComboBox::activated), this,
             [this](int device) { UpdateDeviceSelection(device); });
+    connect(ui->backend, qOverload<int>(&QComboBox::activated), this,
+            [this](int backend) { UpdateShaderBackendSelection(backend); });
 
     connect(ui->bg_button, &QPushButton::clicked, this, [this] {
         const QColor new_bg_color = QColorDialog::getColor(bg_color);
@@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) {
     }
 }
 
+void ConfigureGraphics::UpdateShaderBackendSelection(int backend) {
+    if (backend == -1) {
+        return;
+    }
+    if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) {
+        shader_backend = static_cast<Settings::ShaderBackend>(backend);
+    }
+}
+
 ConfigureGraphics::~ConfigureGraphics() = default;
 
 void ConfigureGraphics::SetConfiguration() {
     const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
 
-    ui->api->setEnabled(runtime_lock);
+    ui->api_widget->setEnabled(runtime_lock);
     ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
     ui->use_disk_shader_cache->setEnabled(runtime_lock);
     ui->use_nvdec_emulation->setEnabled(runtime_lock);
@@ -83,7 +102,7 @@ void ConfigureGraphics::SetConfiguration() {
         ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
     } else {
         ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
-        ConfigurationShared::SetHighlight(ui->api_layout,
+        ConfigurationShared::SetHighlight(ui->api_widget,
                                           !Settings::values.renderer_backend.UsingGlobal());
 
         ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox,
@@ -100,11 +119,10 @@ void ConfigureGraphics::SetConfiguration() {
         ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal());
         ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal());
     }
-
     UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(),
                                                 Settings::values.bg_green.GetValue(),
                                                 Settings::values.bg_blue.GetValue()));
-    UpdateDeviceComboBox();
+    UpdateAPILayout();
 }
 
 void ConfigureGraphics::ApplyConfiguration() {
@@ -128,6 +146,9 @@ void ConfigureGraphics::ApplyConfiguration() {
         if (Settings::values.renderer_backend.UsingGlobal()) {
             Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
         }
+        if (Settings::values.shader_backend.UsingGlobal()) {
+            Settings::values.shader_backend.SetValue(shader_backend);
+        }
         if (Settings::values.vulkan_device.UsingGlobal()) {
             Settings::values.vulkan_device.SetValue(vulkan_device);
         }
@@ -139,15 +160,22 @@ void ConfigureGraphics::ApplyConfiguration() {
     } else {
         if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
             Settings::values.renderer_backend.SetGlobal(true);
+            Settings::values.shader_backend.SetGlobal(true);
             Settings::values.vulkan_device.SetGlobal(true);
         } else {
             Settings::values.renderer_backend.SetGlobal(false);
             Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
-            if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) {
+            switch (GetCurrentGraphicsBackend()) {
+            case Settings::RendererBackend::OpenGL:
+                Settings::values.shader_backend.SetGlobal(false);
+                Settings::values.vulkan_device.SetGlobal(true);
+                Settings::values.shader_backend.SetValue(shader_backend);
+                break;
+            case Settings::RendererBackend::Vulkan:
+                Settings::values.shader_backend.SetGlobal(true);
                 Settings::values.vulkan_device.SetGlobal(false);
                 Settings::values.vulkan_device.SetValue(vulkan_device);
-            } else {
-                Settings::values.vulkan_device.SetGlobal(true);
+                break;
             }
         }
 
@@ -188,32 +216,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
     ui->bg_button->setIcon(color_icon);
 }
 
-void ConfigureGraphics::UpdateDeviceComboBox() {
-    ui->device->clear();
-
-    bool enabled = false;
-
+void ConfigureGraphics::UpdateAPILayout() {
     if (!Settings::IsConfiguringGlobal() &&
         ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+        vulkan_device = Settings::values.vulkan_device.GetValue(true);
+        shader_backend = Settings::values.shader_backend.GetValue(true);
+        ui->device_widget->setEnabled(false);
+        ui->backend_widget->setEnabled(false);
+    } else {
         vulkan_device = Settings::values.vulkan_device.GetValue();
+        shader_backend = Settings::values.shader_backend.GetValue();
+        ui->device_widget->setEnabled(true);
+        ui->backend_widget->setEnabled(true);
     }
+
     switch (GetCurrentGraphicsBackend()) {
     case Settings::RendererBackend::OpenGL:
-        ui->device->addItem(tr("OpenGL Graphics Device"));
-        enabled = false;
+        ui->backend->setCurrentIndex(static_cast<u32>(shader_backend));
+        ui->device_widget->setVisible(false);
+        ui->backend_widget->setVisible(true);
         break;
     case Settings::RendererBackend::Vulkan:
-        for (const auto& device : vulkan_devices) {
-            ui->device->addItem(device);
-        }
         ui->device->setCurrentIndex(vulkan_device);
-        enabled = !vulkan_devices.empty();
+        ui->device_widget->setVisible(true);
+        ui->backend_widget->setVisible(false);
         break;
     }
-    // If in per-game config and use global is selected, don't enable.
-    enabled &= !(!Settings::IsConfiguringGlobal() &&
-                 ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX);
-    ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn());
 }
 
 void ConfigureGraphics::RetrieveVulkanDevices() try {
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index 6418115cf..c866b911b 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -34,8 +34,9 @@ private:
     void SetConfiguration();
 
     void UpdateBackgroundColorButton(QColor color);
-    void UpdateDeviceComboBox();
+    void UpdateAPILayout();
     void UpdateDeviceSelection(int device);
+    void UpdateShaderBackendSelection(int backend);
 
     void RetrieveVulkanDevices();
 
@@ -53,4 +54,5 @@ private:
 
     std::vector<QString> vulkan_devices;
     u32 vulkan_device{};
+    Settings::ShaderBackend shader_backend{};
 };
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 5b999d84d..099ddbb7c 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -23,7 +23,7 @@
        </property>
        <layout class="QVBoxLayout" name="verticalLayout_3">
         <item>
-         <widget class="QWidget" name="api_layout" native="true">
+         <widget class="QWidget" name="api_widget" native="true">
           <layout class="QGridLayout" name="gridLayout">
            <property name="leftMargin">
             <number>0</number>
@@ -40,37 +40,107 @@
            <property name="horizontalSpacing">
             <number>6</number>
            </property>
-           <item row="0" column="0">
-            <widget class="QLabel" name="api_label">
-             <property name="text">
-              <string>API:</string>
-             </property>
+           <item row="4" column="0">
+            <widget class="QWidget" name="backend_widget" native="true">
+             <layout class="QHBoxLayout" name="backend_layout">
+              <property name="leftMargin">
+               <number>0</number>
+              </property>
+              <property name="topMargin">
+               <number>0</number>
+              </property>
+              <property name="rightMargin">
+               <number>0</number>
+              </property>
+              <property name="bottomMargin">
+               <number>0</number>
+              </property>
+              <item>
+               <widget class="QLabel" name="backend_label">
+                <property name="text">
+                 <string>Shader Backend:</string>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QComboBox" name="backend"/>
+              </item>
+             </layout>
             </widget>
            </item>
-           <item row="0" column="1">
-            <widget class="QComboBox" name="api">
-             <item>
-              <property name="text">
-               <string notr="true">OpenGL</string>
+           <item row="2" column="0">
+            <widget class="QWidget" name="device_widget" native="true">
+             <layout class="QHBoxLayout" name="device_layout">
+              <property name="leftMargin">
+               <number>0</number>
               </property>
-             </item>
-             <item>
-              <property name="text">
-               <string notr="true">Vulkan</string>
+              <property name="topMargin">
+               <number>0</number>
               </property>
-             </item>
+              <property name="rightMargin">
+               <number>0</number>
+              </property>
+              <property name="bottomMargin">
+               <number>0</number>
+              </property>
+              <item>
+               <widget class="QLabel" name="device_label">
+                <property name="text">
+                 <string>Device:</string>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QComboBox" name="device"/>
+              </item>
+             </layout>
             </widget>
            </item>
-           <item row="1" column="0">
-            <widget class="QLabel" name="device_label">
-             <property name="text">
-              <string>Device:</string>
-             </property>
+           <item row="0" column="0">
+            <widget class="QWidget" name="api_layout_2" native="true">
+             <layout class="QHBoxLayout" name="api_layout">
+              <property name="leftMargin">
+               <number>0</number>
+              </property>
+              <property name="topMargin">
+               <number>0</number>
+              </property>
+              <property name="rightMargin">
+               <number>0</number>
+              </property>
+              <property name="bottomMargin">
+               <number>0</number>
+              </property>
+              <item>
+               <widget class="QLabel" name="api_label">
+                <property name="text">
+                 <string>API:</string>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QComboBox" name="api">
+                <property name="sizePolicy">
+                 <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
+                  <horstretch>0</horstretch>
+                  <verstretch>0</verstretch>
+                 </sizepolicy>
+                </property>
+                <item>
+                 <property name="text">
+                  <string notr="true">OpenGL</string>
+                 </property>
+                </item>
+                <item>
+                 <property name="text">
+                  <string notr="true">Vulkan</string>
+                 </property>
+                </item>
+               </widget>
+              </item>
+             </layout>
             </widget>
            </item>
-           <item row="1" column="1">
-            <widget class="QComboBox" name="device"/>
-           </item>
           </layout>
          </widget>
         </item>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index a9e611125..38276feb1 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -23,12 +23,10 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
 void ConfigureGraphicsAdvanced::SetConfiguration() {
     const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
     ui->use_vsync->setEnabled(runtime_lock);
-    ui->use_assembly_shaders->setEnabled(runtime_lock);
     ui->use_asynchronous_shaders->setEnabled(runtime_lock);
     ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
 
     ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
-    ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue());
     ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
     ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
     ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
@@ -58,8 +56,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
                                              ui->anisotropic_filtering_combobox);
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
-    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders,
-                                             ui->use_assembly_shaders, use_assembly_shaders);
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
                                              ui->use_asynchronous_shaders,
                                              use_asynchronous_shaders);
@@ -100,7 +96,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
     if (Settings::IsConfiguringGlobal()) {
         ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
         ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
-        ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal());
         ui->use_asynchronous_shaders->setEnabled(
             Settings::values.use_asynchronous_shaders.UsingGlobal());
         ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
@@ -112,8 +107,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
     }
 
     ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
-    ConfigurationShared::SetColoredTristate(
-        ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders);
     ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
                                             Settings::values.use_asynchronous_shaders,
                                             use_asynchronous_shaders);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index 9148aacf2..7356e6916 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -35,7 +35,6 @@ private:
     std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
 
     ConfigurationShared::CheckState use_vsync;
-    ConfigurationShared::CheckState use_assembly_shaders;
     ConfigurationShared::CheckState use_asynchronous_shaders;
     ConfigurationShared::CheckState use_fast_gpu_time;
     ConfigurationShared::CheckState use_caches_gc;
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index ad0840355..379dc5d2e 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -77,22 +77,12 @@
          </widget>
         </item>
         <item>
-         <widget class="QCheckBox" name="use_assembly_shaders">
-          <property name="toolTip">
-           <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
-          </property>
-          <property name="text">
-           <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
-          </property>
-         </widget>
-        </item>
-        <item>
          <widget class="QCheckBox" name="use_asynchronous_shaders">
           <property name="toolTip">
            <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
           </property>
           <property name="text">
-           <string>Use asynchronous shader building (experimental)</string>
+           <string>Use asynchronous shader building</string>
           </property>
          </widget>
         </item>
@@ -144,22 +134,22 @@
              </item>
              <item>
               <property name="text">
-               <string>2x</string>
+               <string>2x (WILL BREAK THINGS)</string>
               </property>
              </item>
              <item>
               <property name="text">
-               <string>4x</string>
+               <string>4x (WILL BREAK THINGS)</string>
               </property>
              </item>
              <item>
               <property name="text">
-               <string>8x</string>
+               <string>8x (WILL BREAK THINGS)</string>
               </property>
              </item>
              <item>
               <property name="text">
-               <string>16x</string>
+               <string>16x (WILL BREAK THINGS)</string>
               </property>
              </item>
             </widget>
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 76c063c97..f746bd85d 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -520,9 +520,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
     QMenu* remove_menu = context_menu.addMenu(tr("Remove"));
     QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
     QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
-    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove Shader Cache"));
     QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
+    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache"));
+    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache"));
     remove_menu->addSeparator();
+    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches"));
     QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
     QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
     QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
@@ -540,6 +542,8 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
     open_transferable_shader_cache->setVisible(program_id != 0);
     remove_update->setVisible(program_id != 0);
     remove_dlc->setVisible(program_id != 0);
+    remove_gl_shader_cache->setVisible(program_id != 0);
+    remove_vk_shader_cache->setVisible(program_id != 0);
     remove_shader_cache->setVisible(program_id != 0);
     remove_all_content->setVisible(program_id != 0);
     auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id);
@@ -569,8 +573,14 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
     connect(remove_dlc, &QAction::triggered, [this, program_id]() {
         emit RemoveInstalledEntryRequested(program_id, InstalledEntryType::AddOnContent);
     });
+    connect(remove_gl_shader_cache, &QAction::triggered, [this, program_id, path]() {
+        emit RemoveFileRequested(program_id, GameListRemoveTarget::GlShaderCache, path);
+    });
+    connect(remove_vk_shader_cache, &QAction::triggered, [this, program_id, path]() {
+        emit RemoveFileRequested(program_id, GameListRemoveTarget::VkShaderCache, path);
+    });
     connect(remove_shader_cache, &QAction::triggered, [this, program_id, path]() {
-        emit RemoveFileRequested(program_id, GameListRemoveTarget::ShaderCache, path);
+        emit RemoveFileRequested(program_id, GameListRemoveTarget::AllShaderCache, path);
     });
     connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() {
         emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path);
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index c9a9f4654..10339dcca 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -41,7 +41,9 @@ enum class GameListOpenTarget {
 };
 
 enum class GameListRemoveTarget {
-    ShaderCache,
+    GlShaderCache,
+    VkShaderCache,
+    AllShaderCache,
     CustomConfiguration,
 };
 
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 03a909d17..a5159a1ee 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -1654,35 +1654,15 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
 
 void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) {
     const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
-    const auto transferable_shader_cache_folder_path = shader_cache_dir / "opengl" / "transferable";
-    const auto transferable_shader_cache_file_path =
-        transferable_shader_cache_folder_path / fmt::format("{:016X}.bin", program_id);
-
-    if (!Common::FS::Exists(transferable_shader_cache_file_path)) {
+    const auto shader_cache_folder_path{shader_cache_dir / fmt::format("{:016x}", program_id)};
+    if (!Common::FS::CreateDirs(shader_cache_folder_path)) {
         QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"),
-                             tr("A shader cache for this title does not exist."));
+                             tr("Filed to create the shader cache directory for this title."));
         return;
     }
-
-    const auto qt_shader_cache_folder_path =
-        QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_folder_path));
-    const auto qt_shader_cache_file_path =
-        QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_file_path));
-
-    // Windows supports opening a folder with selecting a specified file in explorer. On every other
-    // OS we just open the transferable shader cache folder without preselecting the transferable
-    // shader cache file for the selected game.
-#if defined(Q_OS_WIN)
-    const QString explorer = QStringLiteral("explorer");
-    QStringList param;
-    if (!QFileInfo(qt_shader_cache_file_path).isDir()) {
-        param << QStringLiteral("/select,");
-    }
-    param << QDir::toNativeSeparators(qt_shader_cache_file_path);
-    QProcess::startDetached(explorer, param);
-#else
-    QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_folder_path));
-#endif
+    const auto shader_path_string{Common::FS::PathToUTF8String(shader_cache_folder_path)};
+    const auto qt_shader_cache_path = QString::fromStdString(shader_path_string);
+    QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_path));
 }
 
 static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) {
@@ -1825,8 +1805,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
                                        const std::string& game_path) {
     const QString question = [this, target] {
         switch (target) {
-        case GameListRemoveTarget::ShaderCache:
-            return tr("Delete Transferable Shader Cache?");
+        case GameListRemoveTarget::GlShaderCache:
+            return tr("Delete OpenGL Transferable Shader Cache?");
+        case GameListRemoveTarget::VkShaderCache:
+            return tr("Delete Vulkan Transferable Shader Cache?");
+        case GameListRemoveTarget::AllShaderCache:
+            return tr("Delete All Transferable Shader Caches?");
         case GameListRemoveTarget::CustomConfiguration:
             return tr("Remove Custom Game Configuration?");
         default:
@@ -1840,8 +1824,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
     }
 
     switch (target) {
-    case GameListRemoveTarget::ShaderCache:
-        RemoveTransferableShaderCache(program_id);
+    case GameListRemoveTarget::GlShaderCache:
+    case GameListRemoveTarget::VkShaderCache:
+        RemoveTransferableShaderCache(program_id, target);
+        break;
+    case GameListRemoveTarget::AllShaderCache:
+        RemoveAllTransferableShaderCaches(program_id);
         break;
     case GameListRemoveTarget::CustomConfiguration:
         RemoveCustomConfiguration(program_id, game_path);
@@ -1849,18 +1837,27 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
     }
 }
 
-void GMainWindow::RemoveTransferableShaderCache(u64 program_id) {
+void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target) {
+    const auto target_file_name = [target] {
+        switch (target) {
+        case GameListRemoveTarget::GlShaderCache:
+            return "opengl.bin";
+        case GameListRemoveTarget::VkShaderCache:
+            return "vulkan.bin";
+        default:
+            return "";
+        }
+    }();
     const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
-    const auto transferable_shader_cache_file_path =
-        shader_cache_dir / "opengl" / "transferable" / fmt::format("{:016X}.bin", program_id);
+    const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id);
+    const auto target_file = shader_cache_folder_path / target_file_name;
 
-    if (!Common::FS::Exists(transferable_shader_cache_file_path)) {
+    if (!Common::FS::Exists(target_file)) {
         QMessageBox::warning(this, tr("Error Removing Transferable Shader Cache"),
                              tr("A shader cache for this title does not exist."));
         return;
     }
-
-    if (Common::FS::RemoveFile(transferable_shader_cache_file_path)) {
+    if (Common::FS::RemoveFile(target_file)) {
         QMessageBox::information(this, tr("Successfully Removed"),
                                  tr("Successfully removed the transferable shader cache."));
     } else {
@@ -1869,6 +1866,24 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id) {
     }
 }
 
+void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) {
+    const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
+    const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id);
+
+    if (!Common::FS::Exists(program_shader_cache_dir)) {
+        QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"),
+                             tr("A shader cache for this title does not exist."));
+        return;
+    }
+    if (Common::FS::RemoveDirRecursively(program_shader_cache_dir)) {
+        QMessageBox::information(this, tr("Successfully Removed"),
+                                 tr("Successfully removed the transferable shader caches."));
+    } else {
+        QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"),
+                             tr("Failed to remove the transferable shader cache directory."));
+    }
+}
+
 void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& game_path) {
     const auto file_path = std::filesystem::path(Common::FS::ToU8String(game_path));
     const auto config_file_name =
@@ -2900,13 +2915,13 @@ void GMainWindow::UpdateStatusBar() {
         return;
     }
 
-    auto results = Core::System::GetInstance().GetAndResetPerfStats();
-    auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify();
-    const auto shaders_building = shader_notify.GetShadersBuilding();
+    auto& system = Core::System::GetInstance();
+    auto results = system.GetAndResetPerfStats();
+    auto& shader_notify = system.GPU().ShaderNotify();
+    const int shaders_building = shader_notify.ShadersBuilding();
 
-    if (shaders_building != 0) {
-        shader_building_label->setText(
-            tr("Building: %n shader(s)", "", static_cast<int>(shaders_building)));
+    if (shaders_building > 0) {
+        shader_building_label->setText(tr("Building: %n shader(s)", "", shaders_building));
         shader_building_label->setVisible(true);
     } else {
         shader_building_label->setVisible(false);
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index a50e5b9fe..3eb6aed56 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -282,7 +282,8 @@ private:
     void RemoveBaseContent(u64 program_id, const QString& entry_type);
     void RemoveUpdateContent(u64 program_id, const QString& entry_type);
     void RemoveAddOnContent(u64 program_id, const QString& entry_type);
-    void RemoveTransferableShaderCache(u64 program_id);
+    void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target);
+    void RemoveAllTransferableShaderCaches(u64 program_id);
     void RemoveCustomConfiguration(u64 program_id, const std::string& game_path);
     std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
     InstallResult InstallNSPXCI(const QString& filename);
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 3e22fee37..640d7d111 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -444,6 +444,8 @@ void Config::ReadValues() {
     // Renderer
     ReadSetting("Renderer", Settings::values.renderer_backend);
     ReadSetting("Renderer", Settings::values.renderer_debug);
+    ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);
+    ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks);
     ReadSetting("Renderer", Settings::values.vulkan_device);
 
     ReadSetting("Renderer", Settings::values.fullscreen_mode);
@@ -456,7 +458,7 @@ void Config::ReadValues() {
     ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation);
     ReadSetting("Renderer", Settings::values.use_vsync);
     ReadSetting("Renderer", Settings::values.disable_fps_limit);
-    ReadSetting("Renderer", Settings::values.use_assembly_shaders);
+    ReadSetting("Renderer", Settings::values.shader_backend);
     ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
     ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
     ReadSetting("Renderer", Settings::values.accelerate_astc);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 88d33ecab..b7115b06a 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -221,6 +221,14 @@ backend =
 # 0 (default): Disabled, 1: Enabled
 debug =
 
+# Enable Nsight Aftermath crash dumps
+# 0 (default): Disabled, 1: Enabled
+nsight_aftermath =
+
+# Disable shader loop safety checks, executing the shader without loop logic changes
+# 0 (default): Disabled, 1: Enabled
+disable_shader_loop_safety_checks =
+
 # Which Vulkan physical device to use (defaults to 0)
 vulkan_device =
 
@@ -240,9 +248,10 @@ max_anisotropy =
 # 0 (default): Off, 1: On
 use_vsync =
 
-# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required.
-# 0: Off, 1 (default): On
-use_assembly_shaders =
+# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is
+# not available and GLASM is selected, GLSL will be used.
+# 0: GLSL, 1 (default): GLASM, 2: SPIR-V
+shader_backend =
 
 # Whether to allow asynchronous shader building.
 # 0 (default): Off, 1: On
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index ac4ea88d3..35ce23696 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -218,9 +218,11 @@ int main(int argc, char** argv) {
     // Core is loaded, start the GPU (makes the GPU contexts current to this thread)
     system.GPU().Start();
 
-    system.Renderer().ReadRasterizer()->LoadDiskResources(
-        system.CurrentProcess()->GetTitleID(), std::stop_token{},
-        [](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
+    if (Settings::values.use_disk_shader_cache.GetValue()) {
+        system.Renderer().ReadRasterizer()->LoadDiskResources(
+            system.CurrentProcess()->GetTitleID(), std::stop_token{},
+            [](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
+    }
 
     void(system.Run());
     while (emu_window->IsOpen()) {