summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2021-07-25 20:39:04 +0200
committerGitHub <noreply@github.com>2021-07-25 20:39:04 +0200
commit98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch)
tree816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler
parentMerge pull request #6699 from lat9nq/common-threads (diff)
parentshader: Support out of bound local memory reads and immediate writes (diff)
downloadyuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.bz2
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.lz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.zst
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt268
-rw-r--r--src/shader_recompiler/backend/bindings.h19
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.cpp154
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.h80
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp492
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.h25
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp91
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp244
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp346
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp231
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp414
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp850
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_instructions.h625
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp294
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp568
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp273
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_select.cpp67
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp58
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_special.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp150
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.cpp186
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.h303
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.cpp715
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.h174
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.cpp252
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.h24
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp418
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp21
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp94
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp219
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp456
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp21
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp230
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp456
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp799
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_instructions.h702
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp253
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp28
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp202
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp105
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_select.cpp55
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp79
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_special.cpp111
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp32
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp217
-rw-r--r--src/shader_recompiler/backend/glsl/var_alloc.cpp308
-rw-r--r--src/shader_recompiler/backend/glsl/var_alloc.h105
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp1368
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h307
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp541
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h27
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp448
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp38
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp66
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp155
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp505
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp28
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp269
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp396
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp462
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp183
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h579
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp270
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp26
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp275
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_select.cpp42
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp174
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_special.cpp150
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp30
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp203
-rw-r--r--src/shader_recompiler/environment.h53
-rw-r--r--src/shader_recompiler/exception.h66
-rw-r--r--src/shader_recompiler/frontend/ir/abstract_syntax_list.h58
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.cpp454
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.h250
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.cpp149
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.h185
-rw-r--r--src/shader_recompiler/frontend/ir/breadth_first_search.h56
-rw-r--r--src/shader_recompiler/frontend/ir/condition.cpp29
-rw-r--r--src/shader_recompiler/frontend/ir/condition.h60
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.cpp83
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.h62
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp2017
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h413
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp411
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h49
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.cpp15
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.h110
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc550
-rw-r--r--src/shader_recompiler/frontend/ir/patch.cpp28
-rw-r--r--src/shader_recompiler/frontend/ir/patch.h149
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.cpp46
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.h14
-rw-r--r--src/shader_recompiler/frontend/ir/pred.h44
-rw-r--r--src/shader_recompiler/frontend/ir/program.cpp32
-rw-r--r--src/shader_recompiler/frontend/ir/program.h35
-rw-r--r--src/shader_recompiler/frontend/ir/reg.h332
-rw-r--r--src/shader_recompiler/frontend/ir/type.cpp38
-rw-r--r--src/shader_recompiler/frontend/ir/type.h61
-rw-r--r--src/shader_recompiler/frontend/ir/value.cpp99
-rw-r--r--src/shader_recompiler/frontend/ir/value.h398
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.cpp642
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.h169
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/instruction.h63
-rw-r--r--src/shader_recompiler/frontend/maxwell/location.h112
-rw-r--r--src/shader_recompiler/frontend/maxwell/maxwell.inc286
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.h30
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp883
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h20
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.cpp52
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h23
-rw-r--r--src/shader_recompiler/host_translate_info.h18
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp928
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp610
-rw-r--r--src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp26
-rw-r--r--src/shader_recompiler/ir_opt/dual_vertex_pass.cpp30
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp526
-rw-r--r--src/shader_recompiler/ir_opt/identity_removal_pass.cpp38
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp143
-rw-r--r--src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp218
-rw-r--r--src/shader_recompiler/ir_opt/passes.h32
-rw-r--r--src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp383
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp523
-rw-r--r--src/shader_recompiler/ir_opt/verification_pass.cpp98
-rw-r--r--src/shader_recompiler/object_pool.h104
-rw-r--r--src/shader_recompiler/profile.h74
-rw-r--r--src/shader_recompiler/program_header.h219
-rw-r--r--src/shader_recompiler/runtime_info.h88
-rw-r--r--src/shader_recompiler/shader_info.h193
-rw-r--r--src/shader_recompiler/stage.h28
-rw-r--r--src/shader_recompiler/varying_state.h69
233 files changed, 41653 insertions, 0 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
new file mode 100644
index 000000000..b5b7e5e83
--- /dev/null
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -0,0 +1,268 @@
+add_library(shader_recompiler STATIC
+ backend/bindings.h
+ backend/glasm/emit_context.cpp
+ backend/glasm/emit_context.h
+ backend/glasm/emit_glasm.cpp
+ backend/glasm/emit_glasm.h
+ backend/glasm/emit_glasm_barriers.cpp
+ backend/glasm/emit_glasm_bitwise_conversion.cpp
+ backend/glasm/emit_glasm_composite.cpp
+ backend/glasm/emit_glasm_context_get_set.cpp
+ backend/glasm/emit_glasm_control_flow.cpp
+ backend/glasm/emit_glasm_convert.cpp
+ backend/glasm/emit_glasm_floating_point.cpp
+ backend/glasm/emit_glasm_image.cpp
+ backend/glasm/emit_glasm_instructions.h
+ backend/glasm/emit_glasm_integer.cpp
+ backend/glasm/emit_glasm_logical.cpp
+ backend/glasm/emit_glasm_memory.cpp
+ backend/glasm/emit_glasm_not_implemented.cpp
+ backend/glasm/emit_glasm_select.cpp
+ backend/glasm/emit_glasm_shared_memory.cpp
+ backend/glasm/emit_glasm_special.cpp
+ backend/glasm/emit_glasm_undefined.cpp
+ backend/glasm/emit_glasm_warp.cpp
+ backend/glasm/reg_alloc.cpp
+ backend/glasm/reg_alloc.h
+ backend/glsl/emit_context.cpp
+ backend/glsl/emit_context.h
+ backend/glsl/emit_glsl.cpp
+ backend/glsl/emit_glsl.h
+ backend/glsl/emit_glsl_atomic.cpp
+ backend/glsl/emit_glsl_barriers.cpp
+ backend/glsl/emit_glsl_bitwise_conversion.cpp
+ backend/glsl/emit_glsl_composite.cpp
+ backend/glsl/emit_glsl_context_get_set.cpp
+ backend/glsl/emit_glsl_control_flow.cpp
+ backend/glsl/emit_glsl_convert.cpp
+ backend/glsl/emit_glsl_floating_point.cpp
+ backend/glsl/emit_glsl_image.cpp
+ backend/glsl/emit_glsl_instructions.h
+ backend/glsl/emit_glsl_integer.cpp
+ backend/glsl/emit_glsl_logical.cpp
+ backend/glsl/emit_glsl_memory.cpp
+ backend/glsl/emit_glsl_not_implemented.cpp
+ backend/glsl/emit_glsl_select.cpp
+ backend/glsl/emit_glsl_shared_memory.cpp
+ backend/glsl/emit_glsl_special.cpp
+ backend/glsl/emit_glsl_undefined.cpp
+ backend/glsl/emit_glsl_warp.cpp
+ backend/glsl/var_alloc.cpp
+ backend/glsl/var_alloc.h
+ backend/spirv/emit_context.cpp
+ backend/spirv/emit_context.h
+ backend/spirv/emit_spirv.cpp
+ backend/spirv/emit_spirv.h
+ backend/spirv/emit_spirv_atomic.cpp
+ backend/spirv/emit_spirv_barriers.cpp
+ backend/spirv/emit_spirv_bitwise_conversion.cpp
+ backend/spirv/emit_spirv_composite.cpp
+ backend/spirv/emit_spirv_context_get_set.cpp
+ backend/spirv/emit_spirv_control_flow.cpp
+ backend/spirv/emit_spirv_convert.cpp
+ backend/spirv/emit_spirv_floating_point.cpp
+ backend/spirv/emit_spirv_image.cpp
+ backend/spirv/emit_spirv_image_atomic.cpp
+ backend/spirv/emit_spirv_instructions.h
+ backend/spirv/emit_spirv_integer.cpp
+ backend/spirv/emit_spirv_logical.cpp
+ backend/spirv/emit_spirv_memory.cpp
+ backend/spirv/emit_spirv_select.cpp
+ backend/spirv/emit_spirv_shared_memory.cpp
+ backend/spirv/emit_spirv_special.cpp
+ backend/spirv/emit_spirv_undefined.cpp
+ backend/spirv/emit_spirv_warp.cpp
+ environment.h
+ exception.h
+ frontend/ir/abstract_syntax_list.h
+ frontend/ir/attribute.cpp
+ frontend/ir/attribute.h
+ frontend/ir/basic_block.cpp
+ frontend/ir/basic_block.h
+ frontend/ir/breadth_first_search.h
+ frontend/ir/condition.cpp
+ frontend/ir/condition.h
+ frontend/ir/flow_test.cpp
+ frontend/ir/flow_test.h
+ frontend/ir/ir_emitter.cpp
+ frontend/ir/ir_emitter.h
+ frontend/ir/microinstruction.cpp
+ frontend/ir/modifiers.h
+ frontend/ir/opcodes.cpp
+ frontend/ir/opcodes.h
+ frontend/ir/opcodes.inc
+ frontend/ir/patch.cpp
+ frontend/ir/patch.h
+ frontend/ir/post_order.cpp
+ frontend/ir/post_order.h
+ frontend/ir/pred.h
+ frontend/ir/program.cpp
+ frontend/ir/program.h
+ frontend/ir/reg.h
+ frontend/ir/type.cpp
+ frontend/ir/type.h
+ frontend/ir/value.cpp
+ frontend/ir/value.h
+ frontend/maxwell/control_flow.cpp
+ frontend/maxwell/control_flow.h
+ frontend/maxwell/decode.cpp
+ frontend/maxwell/decode.h
+ frontend/maxwell/indirect_branch_table_track.cpp
+ frontend/maxwell/indirect_branch_table_track.h
+ frontend/maxwell/instruction.h
+ frontend/maxwell/location.h
+ frontend/maxwell/maxwell.inc
+ frontend/maxwell/opcodes.cpp
+ frontend/maxwell/opcodes.h
+ frontend/maxwell/structured_control_flow.cpp
+ frontend/maxwell/structured_control_flow.h
+ frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
+ frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
+ frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
+ frontend/maxwell/translate/impl/barrier_operations.cpp
+ frontend/maxwell/translate/impl/bitfield_extract.cpp
+ frontend/maxwell/translate/impl/bitfield_insert.cpp
+ frontend/maxwell/translate/impl/branch_indirect.cpp
+ frontend/maxwell/translate/impl/common_encoding.h
+ frontend/maxwell/translate/impl/common_funcs.cpp
+ frontend/maxwell/translate/impl/common_funcs.h
+ frontend/maxwell/translate/impl/condition_code_set.cpp
+ frontend/maxwell/translate/impl/double_add.cpp
+ frontend/maxwell/translate/impl/double_compare_and_set.cpp
+ frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
+ frontend/maxwell/translate/impl/double_min_max.cpp
+ frontend/maxwell/translate/impl/double_multiply.cpp
+ frontend/maxwell/translate/impl/double_set_predicate.cpp
+ frontend/maxwell/translate/impl/exit_program.cpp
+ frontend/maxwell/translate/impl/find_leading_one.cpp
+ frontend/maxwell/translate/impl/floating_point_add.cpp
+ frontend/maxwell/translate/impl/floating_point_compare.cpp
+ frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
+ frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
+ frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+ frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+ frontend/maxwell/translate/impl/floating_point_min_max.cpp
+ frontend/maxwell/translate/impl/floating_point_multi_function.cpp
+ frontend/maxwell/translate/impl/floating_point_multiply.cpp
+ frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
+ frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
+ frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
+ frontend/maxwell/translate/impl/half_floating_point_add.cpp
+ frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
+ frontend/maxwell/translate/impl/half_floating_point_helper.cpp
+ frontend/maxwell/translate/impl/half_floating_point_helper.h
+ frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
+ frontend/maxwell/translate/impl/half_floating_point_set.cpp
+ frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
+ frontend/maxwell/translate/impl/impl.cpp
+ frontend/maxwell/translate/impl/impl.h
+ frontend/maxwell/translate/impl/integer_add.cpp
+ frontend/maxwell/translate/impl/integer_add_three_input.cpp
+ frontend/maxwell/translate/impl/integer_compare.cpp
+ frontend/maxwell/translate/impl/integer_compare_and_set.cpp
+ frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
+ frontend/maxwell/translate/impl/integer_funnel_shift.cpp
+ frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
+ frontend/maxwell/translate/impl/integer_popcount.cpp
+ frontend/maxwell/translate/impl/integer_scaled_add.cpp
+ frontend/maxwell/translate/impl/integer_set_predicate.cpp
+ frontend/maxwell/translate/impl/integer_shift_left.cpp
+ frontend/maxwell/translate/impl/integer_shift_right.cpp
+ frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
+ frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
+ frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
+ frontend/maxwell/translate/impl/load_constant.cpp
+ frontend/maxwell/translate/impl/load_constant.h
+ frontend/maxwell/translate/impl/load_effective_address.cpp
+ frontend/maxwell/translate/impl/load_store_attribute.cpp
+ frontend/maxwell/translate/impl/load_store_local_shared.cpp
+ frontend/maxwell/translate/impl/load_store_memory.cpp
+ frontend/maxwell/translate/impl/logic_operation.cpp
+ frontend/maxwell/translate/impl/logic_operation_three_input.cpp
+ frontend/maxwell/translate/impl/move_predicate_to_register.cpp
+ frontend/maxwell/translate/impl/move_register.cpp
+ frontend/maxwell/translate/impl/move_register_to_predicate.cpp
+ frontend/maxwell/translate/impl/move_special_register.cpp
+ frontend/maxwell/translate/impl/not_implemented.cpp
+ frontend/maxwell/translate/impl/output_geometry.cpp
+ frontend/maxwell/translate/impl/pixel_load.cpp
+ frontend/maxwell/translate/impl/predicate_set_predicate.cpp
+ frontend/maxwell/translate/impl/predicate_set_register.cpp
+ frontend/maxwell/translate/impl/select_source_with_predicate.cpp
+ frontend/maxwell/translate/impl/surface_atomic_operations.cpp
+ frontend/maxwell/translate/impl/surface_load_store.cpp
+ frontend/maxwell/translate/impl/texture_fetch.cpp
+ frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+ frontend/maxwell/translate/impl/texture_gather.cpp
+ frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
+ frontend/maxwell/translate/impl/texture_gradient.cpp
+ frontend/maxwell/translate/impl/texture_load.cpp
+ frontend/maxwell/translate/impl/texture_load_swizzled.cpp
+ frontend/maxwell/translate/impl/texture_mipmap_level.cpp
+ frontend/maxwell/translate/impl/texture_query.cpp
+ frontend/maxwell/translate/impl/video_helper.cpp
+ frontend/maxwell/translate/impl/video_helper.h
+ frontend/maxwell/translate/impl/video_minimum_maximum.cpp
+ frontend/maxwell/translate/impl/video_multiply_add.cpp
+ frontend/maxwell/translate/impl/video_set_predicate.cpp
+ frontend/maxwell/translate/impl/vote.cpp
+ frontend/maxwell/translate/impl/warp_shuffle.cpp
+ frontend/maxwell/translate/translate.cpp
+ frontend/maxwell/translate/translate.h
+ frontend/maxwell/translate_program.cpp
+ frontend/maxwell/translate_program.h
+ host_translate_info.h
+ ir_opt/collect_shader_info_pass.cpp
+ ir_opt/constant_propagation_pass.cpp
+ ir_opt/dead_code_elimination_pass.cpp
+ ir_opt/dual_vertex_pass.cpp
+ ir_opt/global_memory_to_storage_buffer_pass.cpp
+ ir_opt/identity_removal_pass.cpp
+ ir_opt/lower_fp16_to_fp32.cpp
+ ir_opt/lower_int64_to_int32.cpp
+ ir_opt/passes.h
+ ir_opt/ssa_rewrite_pass.cpp
+ ir_opt/texture_pass.cpp
+ ir_opt/verification_pass.cpp
+ object_pool.h
+ profile.h
+ program_header.h
+ runtime_info.h
+ shader_info.h
+ varying_state.h
+)
+
+target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit)
+
+if (MSVC)
+ target_compile_options(shader_recompiler PRIVATE
+ /W4
+ /WX
+ /we4018 # 'expression' : signed/unsigned mismatch
+ /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
+ /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
+ /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
+ /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
+ /we4305 # 'context' : truncation from 'type1' to 'type2'
+ /we4800 # Implicit conversion from 'type' to bool. Possible information loss
+ /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior.
+ )
+else()
+ target_compile_options(shader_recompiler PRIVATE
+ -Werror
+ -Werror=conversion
+ -Werror=ignored-qualifiers
+ -Werror=implicit-fallthrough
+ -Werror=shadow
+ -Werror=sign-compare
+ $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
+ $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
+ -Werror=unused-variable
+
+ # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6.
+ # And this in turns limits the size of a std::array.
+ $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024>
+ )
+endif()
+
+create_target_directory_groups(shader_recompiler)
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h
new file mode 100644
index 000000000..35503000c
--- /dev/null
+++ b/src/shader_recompiler/backend/bindings.h
@@ -0,0 +1,19 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader::Backend {
+
+struct Bindings {
+ u32 unified{};
+ u32 uniform_buffer{};
+ u32 storage_buffer{};
+ u32 texture{};
+ u32 image{};
+};
+
+} // namespace Shader::Backend
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
new file mode 100644
index 000000000..069c019ad
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -0,0 +1,154 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+std::string_view InterpDecorator(Interpolation interp) {
+ switch (interp) {
+ case Interpolation::Smooth:
+ return "";
+ case Interpolation::Flat:
+ return "FLAT ";
+ case Interpolation::NoPerspective:
+ return "NOPERSPECTIVE ";
+ }
+ throw InvalidArgument("Invalid interpolation {}", interp);
+}
+
+bool IsInputArray(Stage stage) {
+ return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+ stage == Stage::TessellationEval;
+}
+} // Anonymous namespace
+
+EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_)
+ : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
+ // FIXME: Temporary partial implementation
+ u32 cbuf_index{};
+ for (const auto& desc : info.constant_buffer_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Constant buffer descriptor array");
+ }
+ Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
+ ++cbuf_index;
+ }
+ u32 ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Storage buffer descriptor array");
+ }
+ if (runtime_info.glasm_use_storage_buffers) {
+ Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
+ ++bindings.storage_buffer;
+ ++ssbo_index;
+ }
+ }
+ if (!runtime_info.glasm_use_storage_buffers) {
+ if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
+ Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
+ }
+ }
+ stage = program.stage;
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ stage_name = "vertex";
+ attrib_name = "vertex";
+ break;
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ stage_name = "primitive";
+ attrib_name = "primitive";
+ break;
+ case Stage::Geometry:
+ stage_name = "primitive";
+ attrib_name = "vertex";
+ break;
+ case Stage::Fragment:
+ stage_name = "fragment";
+ attrib_name = "fragment";
+ break;
+ case Stage::Compute:
+ stage_name = "invocation";
+ break;
+ }
+ const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"};
+ const VaryingState loads{info.loads.mask | info.passthrough.mask};
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (loads.Generic(index)) {
+ Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};",
+ InterpDecorator(info.interpolation[index]), index, attr_stage, index, index);
+ }
+ }
+ if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) {
+ Add("ATTRIB vertex_position=vertex.position;");
+ }
+ if (info.uses_invocation_id) {
+ Add("ATTRIB primitive_invocation=primitive.invocation;");
+ }
+ if (info.stores_tess_level_outer) {
+ Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};");
+ }
+ if (info.stores_tess_level_inner) {
+ Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};");
+ }
+ if (info.stores.ClipDistances()) {
+ Add("OUTPUT result_clip[]={{result.clip[0..7]}};");
+ }
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ if (stage == Stage::TessellationControl) {
+ Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};"
+ "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};",
+ index, index, index, index, index, index);
+ } else {
+ Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index,
+ index, index);
+ }
+ }
+ if (stage == Stage::Fragment) {
+ Add("OUTPUT frag_color0=result.color;");
+ for (size_t index = 1; index < info.stores_frag_color.size(); ++index) {
+ Add("OUTPUT frag_color{}=result.color[{}];", index, index);
+ }
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (info.stores.Generic(index)) {
+ Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index);
+ }
+ }
+ image_buffer_bindings.reserve(info.image_buffer_descriptors.size());
+ for (const auto& desc : info.image_buffer_descriptors) {
+ image_buffer_bindings.push_back(bindings.image);
+ bindings.image += desc.count;
+ }
+ image_bindings.reserve(info.image_descriptors.size());
+ for (const auto& desc : info.image_descriptors) {
+ image_bindings.push_back(bindings.image);
+ bindings.image += desc.count;
+ }
+ texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size());
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ texture_buffer_bindings.push_back(bindings.texture);
+ bindings.texture += desc.count;
+ }
+ texture_bindings.reserve(info.texture_descriptors.size());
+ for (const auto& desc : info.texture_descriptors) {
+ texture_bindings.push_back(bindings.texture);
+ bindings.texture += desc.count;
+ }
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h
new file mode 100644
index 000000000..8433e5c00
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.h
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+struct Info;
+struct Profile;
+struct RuntimeInfo;
+} // namespace Shader
+
+namespace Shader::Backend {
+struct Bindings;
+}
+
+namespace Shader::IR {
+class Inst;
+struct Program;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext {
+public:
+ explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_);
+
+ template <typename... Args>
+ void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst),
+ std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ template <typename... Args>
+ void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst),
+ std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ template <typename... Args>
+ void Add(const char* format_str, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ std::string code;
+ RegAlloc reg_alloc{};
+ const Info& info;
+ const Profile& profile;
+ const RuntimeInfo& runtime_info;
+
+ std::vector<u32> texture_buffer_bindings;
+ std::vector<u32> image_buffer_bindings;
+ std::vector<u32> texture_bindings;
+ std::vector<u32> image_bindings;
+
+ Stage stage{};
+ std::string_view stage_name = "invalid";
+ std::string_view attrib_name = "invalid";
+
+ u32 num_safety_loop_vars{};
+ bool uses_y_direction{};
+};
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
new file mode 100644
index 000000000..a5e8c9b6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -0,0 +1,492 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+
+#include "common/div_ceil.h"
+#include "common/settings.h"
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+ using ReturnType = ReturnType_;
+
+ static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <typename T>
+struct Identity {
+ Identity(T data_) : data{data_} {}
+
+ T Extract() {
+ return data;
+ }
+
+ T data;
+};
+
+template <bool scalar>
+class RegWrapper {
+public:
+ RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
+ const Value value{reg_alloc.Peek(ir_value)};
+ if (value.type == Type::Register) {
+ inst = ir_value.InstRecursive();
+ reg = Register{value};
+ } else {
+ reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
+ }
+ switch (value.type) {
+ case Type::Register:
+ case Type::Void:
+ break;
+ case Type::U32:
+ ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
+ break;
+ case Type::U64:
+ ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
+ break;
+ }
+ }
+
+ auto Extract() {
+ if (inst) {
+ reg_alloc.Unref(*inst);
+ } else {
+ reg_alloc.FreeReg(reg);
+ }
+ return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
+ }
+
+private:
+ RegAlloc& reg_alloc;
+ IR::Inst* inst{};
+ Register reg{};
+};
+
+template <typename ArgType>
+class ValueWrapper {
+public:
+ ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
+ : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
+
+ ArgType Extract() {
+ if (!ir_value.IsImmediate()) {
+ reg_alloc.Unref(*ir_value.InstRecursive());
+ }
+ return value;
+ }
+
+private:
+ RegAlloc& reg_alloc;
+ const IR::Value& ir_value;
+ ArgType value;
+};
+
+template <typename ArgType>
+auto Arg(EmitContext& ctx, const IR::Value& arg) {
+ if constexpr (std::is_same_v<ArgType, Register>) {
+ return RegWrapper<false>{ctx, arg};
+ } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
+ return RegWrapper<true>{ctx, arg};
+ } else if constexpr (std::is_base_of_v<Value, ArgType>) {
+ return ValueWrapper<ArgType>{ctx, arg};
+ } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+ return Identity<const IR::Value&>{arg};
+ } else if constexpr (std::is_same_v<ArgType, u32>) {
+ return Identity{arg.U32()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+ return Identity{arg.Attribute()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+ return Identity{arg.Patch()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+ return Identity{arg.Reg()};
+ }
+}
+
+template <auto func, bool is_first_arg_inst>
+struct InvokeCall {
+ template <typename... Args>
+ InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
+ if constexpr (is_first_arg_inst) {
+ func(ctx, *inst, args.Extract()...);
+ } else {
+ func(ctx, args.Extract()...);
+ }
+ }
+};
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+ using Traits = FuncTraits<decltype(func)>;
+ if constexpr (is_first_arg_inst) {
+ InvokeCall<func, is_first_arg_inst>{
+ ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
+ } else {
+ InvokeCall<func, is_first_arg_inst>{
+ ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
+ }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+ using Traits = FuncTraits<decltype(func)>;
+ static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+ if constexpr (Traits::NUM_ARGS == 1) {
+ Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+ } else {
+ using FirstArgType = typename Traits::template ArgType<1>;
+ static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
+ using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+ Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+ }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...) \
+ case IR::Opcode::name: \
+ return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+ }
+ throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+bool IsReference(IR::Inst& inst) {
+ return inst.GetOpcode() == IR::Opcode::Reference;
+}
+
+void PrecolorInst(IR::Inst& phi) {
+ // Insert phi moves before references to avoid overwritting other phis
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::Block& phi_block{*phi.PhiBlock(i)};
+ auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
+ IR::IREmitter ir{phi_block, it};
+ const IR::Value arg{phi.Arg(i)};
+ if (arg.IsImmediate()) {
+ ir.PhiMove(phi, arg);
+ } else {
+ ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
+ }
+ }
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
+ }
+}
+
+void Precolor(const IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& phi : block->Instructions()) {
+ if (!IR::IsPhi(phi)) {
+ break;
+ }
+ PrecolorInst(phi);
+ }
+ }
+}
+
+void EmitCode(EmitContext& ctx, const IR::Program& program) {
+ const auto eval{
+ [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
+ for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::Block:
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ EmitInst(ctx, &inst);
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::If:
+ ctx.Add("MOV.S.CC RC,{};"
+ "IF NE.x;",
+ eval(node.data.if_node.cond));
+ break;
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ ctx.Add("ENDIF;");
+ break;
+ case IR::AbstractSyntaxNode::Type::Loop:
+ ctx.Add("REP;");
+ break;
+ case IR::AbstractSyntaxNode::Type::Repeat:
+ if (!Settings::values.disable_shader_loop_safety_checks) {
+ const u32 loop_index{ctx.num_safety_loop_vars++};
+ const u32 vector_index{loop_index / 4};
+ const char component{"xyzw"[loop_index % 4]};
+ ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
+ "BRK(LT.{});",
+ vector_index, component, vector_index, component, component);
+ }
+ if (node.data.repeat.cond.IsImmediate()) {
+ if (node.data.repeat.cond.U1()) {
+ ctx.Add("ENDREP;");
+ } else {
+ ctx.Add("BRK;"
+ "ENDREP;");
+ }
+ } else {
+ ctx.Add("MOV.S.CC RC,{};"
+ "BRK(EQ.x);"
+ "ENDREP;",
+ eval(node.data.repeat.cond));
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Break:
+ if (node.data.break_node.cond.IsImmediate()) {
+ if (node.data.break_node.cond.U1()) {
+ ctx.Add("BRK;");
+ }
+ } else {
+ ctx.Add("MOV.S.CC RC,{};"
+ "BRK (NE.x);",
+ eval(node.data.break_node.cond));
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Return:
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ ctx.Add("RET;");
+ break;
+ }
+ }
+ if (!ctx.reg_alloc.IsEmpty()) {
+ LOG_WARNING(Shader_GLASM, "Register leak after generating code");
+ }
+}
+
+void SetupOptions(const IR::Program& program, const Profile& profile,
+ const RuntimeInfo& runtime_info, std::string& header) {
+ const Info& info{program.info};
+ const Stage stage{program.stage};
+
+ // TODO: Track the shared atomic ops
+ header += "OPTION NV_internal;"
+ "OPTION NV_shader_storage_buffer;"
+ "OPTION NV_gpu_program_fp64;";
+ if (info.uses_int64_bit_atomics) {
+ header += "OPTION NV_shader_atomic_int64;";
+ }
+ if (info.uses_atomic_f32_add) {
+ header += "OPTION NV_shader_atomic_float;";
+ }
+ if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
+ header += "OPTION NV_shader_atomic_fp16_vector;";
+ }
+ if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
+ info.uses_fswzadd) {
+ header += "OPTION NV_shader_thread_group;";
+ }
+ if (info.uses_subgroup_shuffles) {
+ header += "OPTION NV_shader_thread_shuffle;";
+ }
+ if (info.uses_sparse_residency) {
+ header += "OPTION EXT_sparse_texture2;";
+ }
+ const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
+ info.stores[IR::Attribute::Layer]};
+ if ((stage != Stage::Geometry && stores_viewport_layer) ||
+ info.stores[IR::Attribute::ViewportMask]) {
+ if (profile.support_viewport_index_layer_non_geometry) {
+ header += "OPTION NV_viewport_array2;";
+ }
+ }
+ if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
+ header += "OPTION NV_geometry_shader_passthrough;";
+ }
+ if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
+ header += "OPTION EXT_shader_image_load_formatted;";
+ }
+ if (profile.support_derivative_control) {
+ header += "OPTION ARB_derivative_control;";
+ }
+ if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
+ header += "OPTION NV_early_fragment_tests;";
+ }
+ if (stage == Stage::Fragment) {
+ header += "OPTION ARB_draw_buffers;";
+ }
+}
+
+std::string_view StageHeader(Stage stage) {
+ switch (stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return "!!NVvp5.0\n";
+ case Stage::TessellationControl:
+ return "!!NVtcp5.0\n";
+ case Stage::TessellationEval:
+ return "!!NVtep5.0\n";
+ case Stage::Geometry:
+ return "!!NVgp5.0\n";
+ case Stage::Fragment:
+ return "!!NVfp5.0\n";
+ case Stage::Compute:
+ return "!!NVcp5.0\n";
+ }
+ throw InvalidArgument("Invalid stage {}", stage);
+}
+
+std::string_view InputPrimitive(InputTopology topology) {
+ switch (topology) {
+ case InputTopology::Points:
+ return "POINTS";
+ case InputTopology::Lines:
+ return "LINES";
+ case InputTopology::LinesAdjacency:
+ return "LINESS_ADJACENCY";
+ case InputTopology::Triangles:
+ return "TRIANGLES";
+ case InputTopology::TrianglesAdjacency:
+ return "TRIANGLES_ADJACENCY";
+ }
+ throw InvalidArgument("Invalid input topology {}", topology);
+}
+
+std::string_view OutputPrimitive(OutputTopology topology) {
+ switch (topology) {
+ case OutputTopology::PointList:
+ return "POINTS";
+ case OutputTopology::LineStrip:
+ return "LINE_STRIP";
+ case OutputTopology::TriangleStrip:
+ return "TRIANGLE_STRIP";
+ }
+ throw InvalidArgument("Invalid output topology {}", topology);
+}
+
+std::string_view GetTessMode(TessPrimitive primitive) {
+ switch (primitive) {
+ case TessPrimitive::Triangles:
+ return "TRIANGLES";
+ case TessPrimitive::Quads:
+ return "QUADS";
+ case TessPrimitive::Isolines:
+ return "ISOLINES";
+ }
+ throw InvalidArgument("Invalid tessellation primitive {}", primitive);
+}
+
+std::string_view GetTessSpacing(TessSpacing spacing) {
+ switch (spacing) {
+ case TessSpacing::Equal:
+ return "EQUAL";
+ case TessSpacing::FractionalOdd:
+ return "FRACTIONAL_ODD";
+ case TessSpacing::FractionalEven:
+ return "FRACTIONAL_EVEN";
+ }
+ throw InvalidArgument("Invalid tessellation spacing {}", spacing);
+}
+} // Anonymous namespace
+
+std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
+ Bindings& bindings) {
+ EmitContext ctx{program, bindings, profile, runtime_info};
+ Precolor(program);
+ EmitCode(ctx, program);
+ std::string header{StageHeader(program.stage)};
+ SetupOptions(program, profile, runtime_info, header);
+ switch (program.stage) {
+ case Stage::TessellationControl:
+ header += fmt::format("VERTICES_OUT {};", program.invocations);
+ break;
+ case Stage::TessellationEval:
+ header += fmt::format("TESS_MODE {};"
+ "TESS_SPACING {};"
+ "TESS_VERTEX_ORDER {};",
+ GetTessMode(runtime_info.tess_primitive),
+ GetTessSpacing(runtime_info.tess_spacing),
+ runtime_info.tess_clockwise ? "CW" : "CCW");
+ break;
+ case Stage::Geometry:
+ header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
+ if (program.is_geometry_passthrough) {
+ if (profile.support_geometry_shader_passthrough) {
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (program.info.passthrough.Generic(index)) {
+ header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
+ }
+ }
+ if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
+ header += "PASSTHROUGH result.position;";
+ }
+ } else {
+ LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
+ }
+ } else {
+ header +=
+ fmt::format("VERTICES_OUT {};"
+ "PRIMITIVE_OUT {};",
+ program.output_vertices, OutputPrimitive(program.output_topology));
+ }
+ break;
+ case Stage::Compute:
+ header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
+ program.workgroup_size[1], program.workgroup_size[2]);
+ break;
+ default:
+ break;
+ }
+ if (program.shared_memory_size > 0) {
+ header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
+ header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
+ }
+ header += "TEMP ";
+ for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
+ header += fmt::format("R{},", index);
+ }
+ if (program.local_memory_size > 0) {
+ header += fmt::format("lmem[{}],", program.local_memory_size);
+ }
+ if (program.info.uses_fswzadd) {
+ header += "FSWZA[4],FSWZB[4],";
+ }
+ const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
+ for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
+ header += fmt::format("loop{},", index);
+ }
+ header += "RC;"
+ "LONG TEMP ";
+ for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
+ header += fmt::format("D{},", index);
+ }
+ header += "DC;";
+ if (program.info.uses_fswzadd) {
+ header += "MOV.F FSWZA[0],-1;"
+ "MOV.F FSWZA[1],1;"
+ "MOV.F FSWZA[2],-1;"
+ "MOV.F FSWZA[3],0;"
+ "MOV.F FSWZB[0],-1;"
+ "MOV.F FSWZB[1],-1;"
+ "MOV.F FSWZB[2],1;"
+ "MOV.F FSWZB[3],-1;";
+ }
+ for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
+ header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
+ }
+ if (ctx.uses_y_direction) {
+ header += "PARAM y_direction[1]={state.material.front.ambient};";
+ }
+ ctx.code.insert(0, header);
+ ctx.code += "END";
+ return ctx.code;
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
new file mode 100644
index 000000000..bcb55f062
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -0,0 +1,25 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+
+[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program) {
+ Bindings binding;
+ return EmitGLASM(profile, runtime_info, program, binding);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
new file mode 100644
index 000000000..9201ccd39
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -0,0 +1,91 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+static void Alias(IR::Inst& inst, const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return;
+ }
+ IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())};
+ value_inst.DestructiveAddUsage(inst.UseCount());
+ value_inst.DestructiveRemoveUsage();
+ inst.SetDefinition(value_inst.Definition<Id>());
+}
+
+void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
+ // Fake one usage to get a real register out of the condition
+ inst.DestructiveAddUsage(1);
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ const ScalarS32 input{ctx.reg_alloc.Consume(value)};
+ if (ret != input) {
+ ctx.Add("MOV.S {},{};", ret, input);
+ }
+}
+
+void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("PK64.U {}.x,{};", inst, value);
+}
+
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP64.U {}.xy,{}.x;", inst, value);
+}
+
+void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("PK2H {}.x,{};", inst, value);
+}
+
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP2H {}.xy,{}.x;", inst, value);
+}
+
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("PK64 {}.x,{};", inst, value);
+}
+
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP64 {}.xy,{}.x;", inst, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
new file mode 100644
index 000000000..bff0b7c1c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -0,0 +1,244 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <auto read_imm, char type, typename... Values>
+void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (std::ranges::any_of(std::array{elements...},
+ [](const IR::Value& value) { return value.IsImmediate(); })) {
+ using Type = std::invoke_result_t<decltype(read_imm), IR::Value>;
+ const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...};
+ ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]),
+ fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3]));
+ }
+ size_t index{};
+ for (const IR::Value& element : {elements...}) {
+ if (!element.IsImmediate()) {
+ const ScalarU32 value{ctx.reg_alloc.Consume(element)};
+ ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value);
+ }
+ ++index;
+ }
+}
+
+void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ret == composite && index == 0) {
+ // No need to do anything here, the source and destination are the same register
+ return;
+ }
+ ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]);
+}
+
+template <typename ObjectType>
+void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object,
+ u32 index, char type) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ const char swizzle{"xyzw"[index]};
+ if (ret != composite && ret == object) {
+ // The object is aliased with the return value, so we have to use a temporary to insert
+ ctx.Add("MOV.{} RC,{};"
+ "MOV.{} RC.{},{};"
+ "MOV.{} {},RC;",
+ type, composite, type, swizzle, object, type, ret);
+ } else if (ret != composite) {
+ // The input composite is not aliased with the return value so we have to copy it before
+ // hand. But the insert object is not aliased with the return value, so we don't have to
+ // worry about that
+ ctx.Add("MOV.{} {},{};"
+ "MOV.{} {}.{},{};",
+ type, ret, composite, type, ret, swizzle, object);
+ } else {
+ // The return value is alised so we can just insert the object, it doesn't matter if it's
+ // aliased
+ ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object);
+ }
+}
+} // Anonymous namespace
+
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2);
+}
+
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2, [[maybe_unused]] Register e3,
+ [[maybe_unused]] Register e4) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2);
+}
+
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
new file mode 100644
index 000000000..02c9dc6d7
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -0,0 +1,346 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ std::string_view size) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Indirect constant buffer loading");
+ }
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (offset.type == Type::U32) {
+ // Avoid reading arrays out of bounds, matching hardware's behavior
+ if (offset.imm_u32 >= 0x10'000) {
+ ctx.Add("MOV.S {},0;", ret);
+ return;
+ }
+ }
+ ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
+}
+
+bool IsInputArray(Stage stage) {
+ return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+ stage == Stage::TessellationEval;
+}
+
+std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
+ return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
+}
+
+u32 TexCoordIndex(IR::Attribute attr) {
+ return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
+}
+} // Anonymous namespace
+
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U8");
+}
+
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "S8");
+}
+
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U16");
+}
+
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "S16");
+}
+
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U32");
+}
+
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "F32");
+}
+
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U32X2");
+}
+
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle);
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::PrimitiveId:
+ ctx.Add("MOV.S {}.x,primitive.id;", inst);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ if (IsInputArray(ctx.stage)) {
+ ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle);
+ } else {
+ ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle);
+ }
+ break;
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle);
+ break;
+ case IR::Attribute::PointSpriteS:
+ case IR::Attribute::PointSpriteT:
+ ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle);
+ break;
+ case IR::Attribute::TessellationEvaluationPointU:
+ case IR::Attribute::TessellationEvaluationPointV:
+ ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle);
+ break;
+ case IR::Attribute::InstanceId:
+ ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::VertexId:
+ ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::FrontFace:
+ ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
+ break;
+ default:
+ throw NotImplementedException("Get attribute {}", attr);
+ }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
+ [[maybe_unused]] ScalarU32 vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value);
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::Layer:
+ if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.layer.x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM,
+ "Layer stored outside of geometry shader not supported by device");
+ }
+ break;
+ case IR::Attribute::ViewportIndex:
+ if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.viewport.x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM,
+ "Viewport stored outside of geometry shader not supported by device");
+ }
+ break;
+ case IR::Attribute::ViewportMask:
+ // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage.
+ if (ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.viewportmask[0].x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask");
+ }
+ break;
+ case IR::Attribute::PointSize:
+ ctx.Add("MOV.F result.pointsize.x,{};", value);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ ctx.Add("MOV.F result.position.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ ctx.Add("MOV.F result.color.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontSpecularR:
+ case IR::Attribute::ColorFrontSpecularG:
+ case IR::Attribute::ColorFrontSpecularB:
+ case IR::Attribute::ColorFrontSpecularA:
+ ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackDiffuseR:
+ case IR::Attribute::ColorBackDiffuseG:
+ case IR::Attribute::ColorBackDiffuseB:
+ case IR::Attribute::ColorBackDiffuseA:
+ ctx.Add("MOV.F result.color.back.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackSpecularR:
+ case IR::Attribute::ColorBackSpecularG:
+ case IR::Attribute::ColorBackSpecularB:
+ case IR::Attribute::ColorBackSpecularA:
+ ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::FogCoordinate:
+ ctx.Add("MOV.F result.fogcoord.x,{};", value);
+ break;
+ case IR::Attribute::ClipDistance0:
+ case IR::Attribute::ClipDistance1:
+ case IR::Attribute::ClipDistance2:
+ case IR::Attribute::ClipDistance3:
+ case IR::Attribute::ClipDistance4:
+ case IR::Attribute::ClipDistance5:
+ case IR::Attribute::ClipDistance6:
+ case IR::Attribute::ClipDistance7: {
+ const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
+ ctx.Add("MOV.F result.clip[{}].x,{};", index, value);
+ break;
+ }
+ default:
+ throw NotImplementedException("Set attribute {}", attr);
+ }
+}
+
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) {
+ // RC.x = base_index
+ // RC.y = masked_index
+ // RC.z = compare_index
+ ctx.Add("SHR.S RC.x,{},2;"
+ "AND.S RC.y,RC.x,3;"
+ "SHR.S RC.z,{},4;",
+ offset, offset);
+
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ u32 num_endifs{};
+ const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) {
+ ++num_endifs;
+ ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index
+ "IF NE.w;"
+ // X
+ "SEQ.S.CC RC.w,RC.y,0;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // Y
+ "SEQ.S.CC RC.w,RC.y,1;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // Z
+ "SEQ.S.CC RC.w,RC.y,2;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // W
+ "MOV {}.x,{};"
+ "ENDIF;"
+ "ENDIF;"
+ "ENDIF;"
+ "ELSE;",
+ compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]);
+ }};
+ const auto read_swizzled{[&](u32 compare_index, std::string_view value) {
+ const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value),
+ fmt::format("{}.z", value), fmt::format("{}.w", value)};
+ read(compare_index, values);
+ }};
+ if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ const u32 index{static_cast<u32>(IR::Attribute::PositionX)};
+ if (IsInputArray(ctx.stage)) {
+ read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex)));
+ } else {
+ read_swizzled(index, fmt::format("{}.position", ctx.attrib_name));
+ }
+ }
+ for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
+ if (!ctx.info.loads.Generic(index)) {
+ continue;
+ }
+ read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex)));
+ }
+ for (u32 i = 0; i < num_endifs; ++i) {
+ ctx.Add("ENDIF;");
+ }
+}
+
+void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset,
+ [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Non-generic patch load");
+ }
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ const char swizzle{"xyzw"[element]};
+ const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""};
+ ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) {
+ if (IR::IsGeneric(patch)) {
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value);
+ return;
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodBottom: {
+ const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+ ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value);
+ break;
+ }
+ case IR::Patch::TessellationLodInteriorU:
+ ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value);
+ break;
+ case IR::Patch::TessellationLodInteriorV:
+ ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value);
+ break;
+ default:
+ throw NotImplementedException("Patch {}", patch);
+ }
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) {
+ ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) {
+ ctx.Add("MOV.S result.samplemask.x,{};", value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) {
+ ctx.Add("MOV.F result.depth.z,{};", value);
+}
+
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) {
+ ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset);
+}
+
+void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) {
+ ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
new file mode 100644
index 000000000..ccdf1cbc8
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
@@ -0,0 +1,231 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+std::string_view FpRounding(IR::FpRounding fp_rounding) {
+ switch (fp_rounding) {
+ case IR::FpRounding::DontCare:
+ return "";
+ case IR::FpRounding::RN:
+ return ".ROUND";
+ case IR::FpRounding::RZ:
+ return ".TRUNC";
+ case IR::FpRounding::RM:
+ return ".FLR";
+ case IR::FpRounding::RP:
+ return ".CEIL";
+ }
+ throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding);
+}
+
+template <typename InputType>
+void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest,
+ std::string_view src, bool is_long_result) {
+ const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)};
+ const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)};
+ ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value);
+}
+} // Anonymous namespace
+
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S16", "F16", false);
+}
+
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S16", "F32", false);
+}
+
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S16", "F64", false);
+}
+
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S32", "F16", false);
+}
+
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S32", "F32", false);
+}
+
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S32", "F64", false);
+}
+
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S64", "F16", true);
+}
+
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S64", "F32", true);
+}
+
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S64", "F64", true);
+}
+
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U16", "F16", false);
+}
+
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U16", "F32", false);
+}
+
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U16", "F64", false);
+}
+
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U32", "F16", false);
+}
+
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U32", "F32", false);
+}
+
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U32", "F64", false);
+}
+
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U64", "F16", true);
+}
+
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U64", "F32", true);
+}
+
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U64", "F64", true);
+}
+
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "U64", "U32", true);
+}
+
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U32", "U64", false);
+}
+
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "F16", "F32", false);
+}
+
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "F16", false);
+}
+
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "F32", "F64", false);
+}
+
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "F64", "F32", true);
+}
+
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S8", false);
+}
+
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S16", false);
+}
+
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F16", "S32", false);
+}
+
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S64", false);
+}
+
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U8", false);
+}
+
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U16", false);
+}
+
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F16", "U32", false);
+}
+
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U64", false);
+}
+
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S8", false);
+}
+
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S16", false);
+}
+
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F32", "S32", false);
+}
+
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S64", false);
+}
+
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U8", false);
+}
+
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U16", false);
+}
+
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F32", "U32", false);
+}
+
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U64", false);
+}
+
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S8", true);
+}
+
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S16", true);
+}
+
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F64", "S32", true);
+}
+
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S64", true);
+}
+
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U8", true);
+}
+
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U16", true);
+}
+
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F64", "U32", true);
+}
+
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U64", true);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
new file mode 100644
index 000000000..4ed58619d
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
@@ -0,0 +1,414 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <typename InputType>
+void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op,
+ std::string_view type, bool ordered, bool inequality = false) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs);
+ if (ordered && inequality) {
+ ctx.Add("SEQ.{} RC.y,{},{};"
+ "SEQ.{} RC.z,{},{};"
+ "AND.U RC.x,RC.x,RC.y;"
+ "AND.U RC.x,RC.x,RC.z;"
+ "SNE.S {}.x,RC.x,0;",
+ type, lhs, lhs, type, rhs, rhs, ret);
+ } else if (ordered) {
+ ctx.Add("SNE.S {}.x,RC.x,0;", ret);
+ } else {
+ ctx.Add("SNE.{} RC.y,{},{};"
+ "SNE.{} RC.z,{},{};"
+ "OR.U RC.x,RC.x,RC.y;"
+ "OR.U RC.x,RC.x,RC.z;"
+ "SNE.S {}.x,RC.x,0;",
+ type, lhs, lhs, type, rhs, rhs, ret);
+ }
+}
+
+template <typename InputType>
+void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value,
+ InputType max_value, std::string_view type) {
+ // Call MAX first to properly clamp nan to min_value instead
+ ctx.Add("MAX.{} RC.x,{},{};"
+ "MIN.{} {}.x,RC.x,{};",
+ type, min_value, value, type, ret, max_value);
+}
+
+std::string_view Precise(IR::Inst& inst) {
+ const bool precise{inst.Flags<IR::FpControl>().no_contraction};
+ return precise ? ".PREC" : "";
+}
+} // Anonymous namespace
+
+void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("MOV.F {}.x,|{}|;", inst, value);
+}
+
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value);
+}
+
+void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
+}
+
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
+}
+
+void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b,
+ [[maybe_unused]] Register c) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) {
+ ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c);
+}
+
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) {
+ ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c);
+}
+
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MAX.F {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MIN.F {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
+}
+
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
+}
+
+void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) {
+ ctx.Add("MOV.F {}.x,-{};", inst, value);
+}
+
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value);
+}
+
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("SIN {}.x,{};", inst, value);
+}
+
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("COS {}.x,{};", inst, value);
+}
+
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("EX2 {}.x,{};", inst, value);
+}
+
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("LG2 {}.x,{};", inst, value);
+}
+
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("RCP {}.x,{};", inst, value);
+}
+
+void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("RSQ {}.x,{};", inst, value);
+}
+
+void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret);
+}
+
+void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("MOV.F.SAT {}.x,{};", inst, value);
+}
+
+void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value,
+ [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
+ ScalarF32 max_value) {
+ Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F");
+}
+
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
+ ScalarF64 max_value) {
+ Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64");
+}
+
+void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("ROUND.F {}.x,{};", inst, value);
+}
+
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("FLR.F {}.x,{};", inst, value);
+}
+
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("FLR.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("CEIL.F {}.x,{};", inst, value);
+}
+
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("TRUNC.F {}.x,{};", inst, value);
+}
+
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F", true);
+}
+
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true);
+}
+
+void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F", false);
+}
+
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false);
+}
+
+void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true);
+}
+
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true);
+}
+
+void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true);
+}
+
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true);
+}
+
+void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F", true);
+}
+
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F64", true);
+}
+
+void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F", false);
+}
+
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F64", false);
+}
+
+void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F", true);
+}
+
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F64", true);
+}
+
+void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F", false);
+}
+
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F64", false);
+}
+
+void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F", true);
+}
+
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F64", true);
+}
+
+void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F", false);
+}
+
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F64", false);
+}
+
+void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F", true);
+}
+
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F64", true);
+}
+
+void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F", false);
+}
+
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F64", false);
+}
+
+void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Compare(ctx, inst, value, value, "SNE", "F", true, false);
+}
+
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Compare(ctx, inst, value, value, "SNE", "F64", true, false);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
new file mode 100644
index 000000000..09e3a9b82
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -0,0 +1,850 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+struct ScopedRegister {
+ ScopedRegister() = default;
+ ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{&reg_alloc_}, reg{reg_alloc->AllocReg()} {}
+
+ ~ScopedRegister() {
+ if (reg_alloc) {
+ reg_alloc->FreeReg(reg);
+ }
+ }
+
+ ScopedRegister& operator=(ScopedRegister&& rhs) noexcept {
+ if (reg_alloc) {
+ reg_alloc->FreeReg(reg);
+ }
+ reg_alloc = std::exchange(rhs.reg_alloc, nullptr);
+ reg = rhs.reg;
+ return *this;
+ }
+
+ ScopedRegister(ScopedRegister&& rhs) noexcept
+ : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {}
+
+ ScopedRegister& operator=(const ScopedRegister&) = delete;
+ ScopedRegister(const ScopedRegister&) = delete;
+
+ RegAlloc* reg_alloc{};
+ Register reg;
+};
+
+std::string Texture(EmitContext& ctx, IR::TextureInstInfo info,
+ [[maybe_unused]] const IR::Value& index) {
+ // FIXME: indexed reads
+ if (info.type == TextureType::Buffer) {
+ return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index));
+ } else {
+ return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index));
+ }
+}
+
+std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
+ [[maybe_unused]] const IR::Value& index) {
+ // FIXME: indexed reads
+ if (info.type == TextureType::Buffer) {
+ return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index));
+ } else {
+ return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index));
+ }
+}
+
+std::string_view TextureType(IR::TextureInstInfo info) {
+ if (info.is_depth) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ return "SHADOW1D";
+ case TextureType::ColorArray1D:
+ return "SHADOWARRAY1D";
+ case TextureType::Color2D:
+ return "SHADOW2D";
+ case TextureType::ColorArray2D:
+ return "SHADOWARRAY2D";
+ case TextureType::Color3D:
+ return "SHADOW3D";
+ case TextureType::ColorCube:
+ return "SHADOWCUBE";
+ case TextureType::ColorArrayCube:
+ return "SHADOWARRAYCUBE";
+ case TextureType::Buffer:
+ return "SHADOWBUFFER";
+ }
+ } else {
+ switch (info.type) {
+ case TextureType::Color1D:
+ return "1D";
+ case TextureType::ColorArray1D:
+ return "ARRAY1D";
+ case TextureType::Color2D:
+ return "2D";
+ case TextureType::ColorArray2D:
+ return "ARRAY2D";
+ case TextureType::Color3D:
+ return "3D";
+ case TextureType::ColorCube:
+ return "CUBE";
+ case TextureType::ColorArrayCube:
+ return "ARRAYCUBE";
+ case TextureType::Buffer:
+ return "BUFFER";
+ }
+ }
+ throw InvalidArgument("Invalid texture type {}", info.type.Value());
+}
+
+std::string Offset(EmitContext& ctx, const IR::Value& offset) {
+ if (offset.IsEmpty()) {
+ return "";
+ }
+ return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)});
+}
+
+std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx,
+ const IR::Value& offset2) {
+ if (offset2.IsEmpty()) {
+ return {};
+ } else {
+ return {ctx.reg_alloc, ctx.reg_alloc};
+ }
+}
+
+void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1,
+ const IR::Value& offset2) {
+ const Register offsets_a{ctx.reg_alloc.Consume(offset1)};
+ const Register offsets_b{ctx.reg_alloc.Consume(offset2)};
+ // Input swizzle: [XYXY] [XYXY]
+ // Output swizzle: [XXXX] [YYYY]
+ ctx.Add("MOV {}.x,{}.x;"
+ "MOV {}.y,{}.z;"
+ "MOV {}.z,{}.x;"
+ "MOV {}.w,{}.z;"
+ "MOV {}.x,{}.y;"
+ "MOV {}.y,{}.w;"
+ "MOV {}.z,{}.y;"
+ "MOV {}.w,{}.w;",
+ off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y,
+ offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b);
+}
+
+std::string GradOffset(const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate");
+ return "";
+ }
+ IR::Inst* const vector{offset.InstRecursive()};
+ if (!vector->AreAllArgsImmediates()) {
+ LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate");
+ return "";
+ }
+ switch (vector->NumArgs()) {
+ case 1:
+ return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32()));
+ case 2:
+ return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()),
+ static_cast<s32>(vector->Arg(1).U32()));
+ default:
+ throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs());
+ }
+}
+
+std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) {
+ if (coord.IsImmediate()) {
+ ScopedRegister scoped_reg(ctx.reg_alloc);
+ ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)});
+ return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)};
+ }
+ std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
+ if (coord.InstRecursive()->HasUses()) {
+ // Move non-dead coords to a separate register, although this should never happen because
+ // vectors are only assembled for immediate texture instructions
+ ctx.Add("MOV.F RC,{};", coord_vec);
+ coord_vec = "RC";
+ }
+ return {std::move(coord_vec), ScopedRegister{}};
+}
+
+void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
+ if (!sparse_inst) {
+ return;
+ }
+ const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
+ ctx.Add("MOV.S {},-1;"
+ "MOV.S {}(NONRESIDENT),0;",
+ sparse_ret, sparse_ret);
+}
+
+std::string_view FormatStorage(ImageFormat format) {
+ switch (format) {
+ case ImageFormat::Typeless:
+ return "U";
+ case ImageFormat::R8_UINT:
+ return "U8";
+ case ImageFormat::R8_SINT:
+ return "S8";
+ case ImageFormat::R16_UINT:
+ return "U16";
+ case ImageFormat::R16_SINT:
+ return "S16";
+ case ImageFormat::R32_UINT:
+ return "U32";
+ case ImageFormat::R32G32_UINT:
+ return "U32X2";
+ case ImageFormat::R32G32B32A32_UINT:
+ return "U32X4";
+ }
+ throw InvalidArgument("Invalid image format {}", format);
+}
+
+template <typename T>
+void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value,
+ std::string_view op) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
+}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+ const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (sparse_inst) {
+ sparse_inst->Invalidate();
+ }
+ return sparse_inst;
+}
+} // Anonymous namespace
+
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.has_bias) {
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec,
+ bias_lc, texture, offset_vec);
+ } else {
+ if (info.has_lod_clamp) {
+ ctx.Add("MOV.F {}.w,{}.x;"
+ "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+ coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type,
+ offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.w,{}.x;"
+ "TXB.F{} {},{},{},{}{};",
+ coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ }
+ } else {
+ if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec,
+ bias_lc, texture, offset_vec);
+ } else {
+ ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
+ type, offset_vec);
+ }
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture,
+ offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{}{};",
+ coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& bias_lc, const IR::Value& offset) {
+ // Allocate early to avoid aliases
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister staging;
+ if (info.type == TextureType::ColorArrayCube) {
+ staging = ScopedRegister{ctx.reg_alloc};
+ }
+ const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+ const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.has_bias) {
+ if (info.has_lod_clamp) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{}.x;"
+ "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+ coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+ bias_lc_vec, texture, type, offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXB.F.LODCLAMP{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+ offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {} with bias and lod clamp",
+ info.type.Value());
+ }
+ } else {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{}.x;"
+ "TXB.F{} {},{},{},{}{};",
+ coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+ texture, type, offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXB.F{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{}.x;"
+ "TXB.F{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ }
+ } else {
+ if (info.has_lod_clamp) {
+ if (info.type != TextureType::ColorArrayCube) {
+ const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+ info.type == TextureType::ColorCube};
+ const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+ ctx.Add("MOV.F {}.{},{};"
+ "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+ coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec,
+ texture, type, offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{};"
+ "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ }
+ } else {
+ if (info.type != TextureType::ColorArrayCube) {
+ const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+ info.type == TextureType::ColorCube};
+ const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+ ctx.Add("MOV.F {}.{},{};"
+ "TEX.F{} {},{},{},{}{};",
+ coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture,
+ type, offset_vec);
+ } else {
+ ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture,
+ type, offset_vec);
+ }
+ }
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& lod, const IR::Value& offset) {
+ // Allocate early to avoid aliases
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister staging;
+ if (info.type == TextureType::ColorArrayCube) {
+ staging = ScopedRegister{ctx.reg_alloc};
+ }
+ const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+ const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{}{};",
+ coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{};"
+ "TXL.F{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) {
+ // Allocate offsets early so they don't overwrite any consumed register
+ const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const char comp{"xyzw"[info.gather_component]};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (offset2.IsEmpty()) {
+ const std::string offset_vec{Offset(ctx, offset)};
+ ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type,
+ offset_vec);
+ } else {
+ SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+ ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg,
+ texture, comp, type);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+ const IR::Value& dref) {
+ // FIXME: This instruction is not working as expected
+
+ // Allocate offsets early so they don't overwrite any consumed register
+ const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ std::string args;
+ switch (info.type) {
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value);
+ args = fmt::to_string(coord_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value);
+ args = fmt::to_string(coord_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ args = fmt::format("{},{}", coord_vec, dref_value);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ if (offset2.IsEmpty()) {
+ const std::string offset_vec{Offset(ctx, offset)};
+ ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec);
+ } else {
+ SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+ ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture,
+ type);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.type == TextureType::Buffer) {
+ ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ } else if (ms.type != Type::Void) {
+ ctx.Add("MOV.S {}.w,{};"
+ "TXFMS.F{} {},{},{},{}{};",
+ coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ } else {
+ ctx.Add("MOV.S {}.w,{};"
+ "TXF.F{} {},{},{},{}{};",
+ coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ ScalarS32 lod) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string_view type{TextureType(info)};
+ ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
+}
+
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string_view type{TextureType(info)};
+ ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type);
+}
+
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister dpdx, dpdy;
+ const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+ if (multi_component) {
+ // Allocate this early to avoid aliasing other registers
+ dpdx = ScopedRegister{ctx.reg_alloc};
+ dpdy = ScopedRegister{ctx.reg_alloc};
+ }
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{GradOffset(offset)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (multi_component) {
+ ctx.Add("MOV.F {}.x,{}.x;"
+ "MOV.F {}.y,{}.z;"
+ "MOV.F {}.x,{}.y;"
+ "MOV.F {}.y,{}.w;",
+ dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
+ dpdy.reg, derivatives_vec);
+ if (info.has_lod_clamp) {
+ const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
+ ctx.Add("MOV.F {}.w,{};"
+ "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
+ dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+ texture, type, offset_vec);
+ } else {
+ ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+ texture, type, offset_vec);
+ }
+ } else {
+ ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
+ derivatives_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view format{FormatStorage(info.image_format)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type);
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ Register color) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string_view format{FormatStorage(info.image_format)};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
+}
+
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
+}
+
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MIN.S32");
+}
+
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MIN.U32");
+}
+
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MAX.S32");
+}
+
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MAX.U32");
+}
+
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32");
+}
+
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32");
+}
+
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "AND.U32");
+}
+
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "OR.U32");
+}
+
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "XOR.U32");
+}
+
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ Register coord, ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32");
+}
+
+void EmitBindlessImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicIAdd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicInc32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicDec32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicAnd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicOr32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicXor32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicExchange32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicIAdd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicInc32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicDec32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicAnd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicOr32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicXor32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicExchange32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
new file mode 100644
index 000000000..12afda43b
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -0,0 +1,625 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext;
+
+// Microinstruction emitters
+void EmitPhi(EmitContext& ctx, IR::Inst& inst);
+void EmitVoid(EmitContext& ctx);
+void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitReference(EmitContext&, const IR::Value& value);
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value);
+void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value);
+void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
+void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value);
+void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value);
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value);
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value);
+void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value);
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2);
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2);
+void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3);
+void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3,
+ Register e4);
+void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2);
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
+void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
+ Register false_value);
+void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
+void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitPackFloat2x16(EmitContext& ctx, Register value);
+void EmitUnpackFloat2x16(EmitContext& ctx, Register value);
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c);
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c);
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c);
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPNeg16(EmitContext& ctx, Register value);
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value);
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecip64(EmitContext& ctx, Register value);
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecipSqrt64(EmitContext& ctx, Register value);
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPSaturate16(EmitContext& ctx, Register value);
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPSaturate64(EmitContext& ctx, Register value);
+void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value);
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
+ ScalarF32 max_value);
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
+ ScalarF64 max_value);
+void EmitFPRoundEven16(EmitContext& ctx, Register value);
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPFloor16(EmitContext& ctx, Register value);
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPCeil16(EmitContext& ctx, Register value);
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPTrunc16(EmitContext& ctx, Register value);
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPIsNan16(EmitContext& ctx, Register value);
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift);
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift);
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift);
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarS32 shift);
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
+ ScalarS32 offset, ScalarS32 count);
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
+ ScalarS32 count);
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
+ ScalarU32 count);
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max);
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max);
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value);
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value);
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ Register value);
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value);
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value);
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarF32 value);
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitGlobalAtomicIAdd32(EmitContext& ctx);
+void EmitGlobalAtomicSMin32(EmitContext& ctx);
+void EmitGlobalAtomicUMin32(EmitContext& ctx);
+void EmitGlobalAtomicSMax32(EmitContext& ctx);
+void EmitGlobalAtomicUMax32(EmitContext& ctx);
+void EmitGlobalAtomicInc32(EmitContext& ctx);
+void EmitGlobalAtomicDec32(EmitContext& ctx);
+void EmitGlobalAtomicAnd32(EmitContext& ctx);
+void EmitGlobalAtomicOr32(EmitContext& ctx);
+void EmitGlobalAtomicXor32(EmitContext& ctx);
+void EmitGlobalAtomicExchange32(EmitContext& ctx);
+void EmitGlobalAtomicIAdd64(EmitContext& ctx);
+void EmitGlobalAtomicSMin64(EmitContext& ctx);
+void EmitGlobalAtomicUMin64(EmitContext& ctx);
+void EmitGlobalAtomicSMax64(EmitContext& ctx);
+void EmitGlobalAtomicUMax64(EmitContext& ctx);
+void EmitGlobalAtomicInc64(EmitContext& ctx);
+void EmitGlobalAtomicDec64(EmitContext& ctx);
+void EmitGlobalAtomicAnd64(EmitContext& ctx);
+void EmitGlobalAtomicOr64(EmitContext& ctx);
+void EmitGlobalAtomicXor64(EmitContext& ctx);
+void EmitGlobalAtomicExchange64(EmitContext& ctx);
+void EmitGlobalAtomicAddF32(EmitContext& ctx);
+void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitBindlessImageSampleImplicitLod(EmitContext&);
+void EmitBindlessImageSampleExplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+void EmitBindlessImageGather(EmitContext&);
+void EmitBindlessImageGatherDref(EmitContext&);
+void EmitBindlessImageFetch(EmitContext&);
+void EmitBindlessImageQueryDimensions(EmitContext&);
+void EmitBindlessImageQueryLod(EmitContext&);
+void EmitBindlessImageGradient(EmitContext&);
+void EmitBindlessImageRead(EmitContext&);
+void EmitBindlessImageWrite(EmitContext&);
+void EmitBoundImageSampleImplicitLod(EmitContext&);
+void EmitBoundImageSampleExplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+void EmitBoundImageGather(EmitContext&);
+void EmitBoundImageGatherDref(EmitContext&);
+void EmitBoundImageFetch(EmitContext&);
+void EmitBoundImageQueryDimensions(EmitContext&);
+void EmitBoundImageQueryLod(EmitContext&);
+void EmitBoundImageGradient(EmitContext&);
+void EmitBoundImageRead(EmitContext&);
+void EmitBoundImageWrite(EmitContext&);
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, Register bias_lc, const IR::Value& offset);
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, ScalarF32 lod, const IR::Value& offset);
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& bias_lc, const IR::Value& offset);
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& lod, const IR::Value& offset);
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2);
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+ const IR::Value& dref);
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ ScalarS32 lod);
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp);
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ Register color);
+void EmitBindlessImageAtomicIAdd32(EmitContext&);
+void EmitBindlessImageAtomicSMin32(EmitContext&);
+void EmitBindlessImageAtomicUMin32(EmitContext&);
+void EmitBindlessImageAtomicSMax32(EmitContext&);
+void EmitBindlessImageAtomicUMax32(EmitContext&);
+void EmitBindlessImageAtomicInc32(EmitContext&);
+void EmitBindlessImageAtomicDec32(EmitContext&);
+void EmitBindlessImageAtomicAnd32(EmitContext&);
+void EmitBindlessImageAtomicOr32(EmitContext&);
+void EmitBindlessImageAtomicXor32(EmitContext&);
+void EmitBindlessImageAtomicExchange32(EmitContext&);
+void EmitBoundImageAtomicIAdd32(EmitContext&);
+void EmitBoundImageAtomicSMin32(EmitContext&);
+void EmitBoundImageAtomicUMin32(EmitContext&);
+void EmitBoundImageAtomicSMax32(EmitContext&);
+void EmitBoundImageAtomicUMax32(EmitContext&);
+void EmitBoundImageAtomicInc32(EmitContext&);
+void EmitBoundImageAtomicDec32(EmitContext&);
+void EmitBoundImageAtomicAnd32(EmitContext&);
+void EmitBoundImageAtomicOr32(EmitContext&);
+void EmitBoundImageAtomicXor32(EmitContext&);
+void EmitBoundImageAtomicExchange32(EmitContext&);
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value);
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value);
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ Register coord, ScalarU32 value);
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
+ ScalarU32 swizzle);
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
new file mode 100644
index 000000000..f55c26b76
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -0,0 +1,294 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b,
+ std::string_view lop) {
+ const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+ const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+ if (zero) {
+ zero->Invalidate();
+ }
+ if (sign) {
+ sign->Invalidate();
+ }
+ if (zero || sign) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b);
+ if (zero) {
+ ctx.Add("SEQ.S {},{},0;", *zero, ret);
+ }
+ if (sign) {
+ ctx.Add("SLT.S {},{},0;", *sign, ret);
+ }
+}
+} // Anonymous namespace
+
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ const std::array flags{
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
+ };
+ for (IR::Inst* const flag_inst : flags) {
+ if (flag_inst) {
+ flag_inst->Invalidate();
+ }
+ }
+ const bool cc{inst.HasAssociatedPseudoOperation()};
+ const std::string_view cc_mod{cc ? ".CC" : ""};
+ if (cc) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b);
+ if (!cc) {
+ return;
+ }
+ static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
+ for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
+ if (!flags[flag_index]) {
+ continue;
+ }
+ const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
+ if (flag_index == 0) {
+ ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
+ } else {
+ // We could use conditional execution here, but it's broken on Nvidia's compiler
+ ctx.Add("IF {}.x;"
+ "MOV.S {}.x,-1;"
+ "ELSE;"
+ "MOV.S {}.x,0;"
+ "ENDIF;",
+ masks[flag_index], flag_ret, flag_ret);
+ }
+ }
+}
+
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
+ ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b);
+}
+
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("SUB.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
+ ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b);
+}
+
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
+ ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
+ } else {
+ ctx.Add("MOV.S {},-{};", inst, value);
+ }
+}
+
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("MOV.S64 {},-{};", inst, value);
+}
+
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("ABS.S {},{};", inst, value);
+}
+
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
+ ctx.Add("SHL.U {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift) {
+ ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
+ ctx.Add("SHR.U {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift) {
+ ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) {
+ ctx.Add("SHR.S {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarS32 shift) {
+ ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "AND");
+}
+
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "OR");
+}
+
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "XOR");
+}
+
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
+ ScalarS32 offset, ScalarS32 count) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base);
+ } else {
+ ctx.Add("MOV.S RC.x,{};"
+ "MOV.S RC.y,{};"
+ "BFI.S {},RC,{},{};",
+ count, offset, ret, insert, base);
+ }
+}
+
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
+ ScalarS32 count) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base);
+ } else {
+ ctx.Add("MOV.S RC.x,{};"
+ "MOV.S RC.y,{};"
+ "BFE.S {},RC,{};",
+ count, offset, ret, base);
+ }
+}
+
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
+ ScalarU32 count) {
+ const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+ const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+ if (zero) {
+ zero->Invalidate();
+ }
+ if (sign) {
+ sign->Invalidate();
+ }
+ if (zero || sign) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
+ } else {
+ ctx.Add("MOV.U RC.x,{};"
+ "MOV.U RC.y,{};"
+ "BFE.U {},RC,{};",
+ count, offset, ret, base);
+ }
+ if (zero) {
+ ctx.Add("SEQ.S {},{},0;", *zero, ret);
+ }
+ if (sign) {
+ ctx.Add("SLT.S {},{},0;", *sign, ret);
+ }
+}
+
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BFR {},{};", inst, value);
+}
+
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BTC {},{};", inst, value);
+}
+
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("NOT.S {},{};", inst, value);
+}
+
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BTFM.S {},{};", inst, value);
+}
+
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ ctx.Add("BTFM.U {},{};", inst, value);
+}
+
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MIN.S {},{},{};", inst, a, b);
+}
+
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+ ctx.Add("MIN.U {},{},{};", inst, a, b);
+}
+
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MAX.S {},{},{};", inst, a, b);
+}
+
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+ ctx.Add("MAX.U {},{},{};", inst, a, b);
+}
+
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("MIN.S RC.x,{},{};"
+ "MAX.S {}.x,RC.x,{};",
+ max, value, ret, min);
+}
+
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("MIN.U RC.x,{},{};"
+ "MAX.U {}.x,RC.x,{};",
+ max, value, ret, min);
+}
+
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
new file mode 100644
index 000000000..af9fac7c1
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -0,0 +1,568 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ std::string_view then_expr, std::string_view else_expr = {}) {
+ // Operate on bindless SSBO, call the expression with bounds checking
+ // address = c[binding].xy
+ // length = c[binding].z
+ const u32 sb_binding{binding.U32()};
+ ctx.Add("PK64.U DC,c[{}];" // pointer = address
+ "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset)
+ "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset
+ "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
+ sb_binding, offset, offset, sb_binding);
+ if (else_expr.empty()) {
+ ctx.Add("IF NE.x;{}ENDIF;", then_expr);
+ } else {
+ ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
+ }
+}
+
+void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
+ std::string_view else_expr = {}) {
+ const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
+ for (size_t index = 0; index < num_buffers; ++index) {
+ if (!ctx.info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const auto& ssbo{ctx.info.storage_buffers_descriptors[index]};
+ ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr
+ "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32
+ "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32
+ "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size
+ "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0
+ "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0
+ "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b
+ "IF NE.x;" // if cond
+ "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
+ ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
+ address, address);
+ if (pointer_based) {
+ ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
+ "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
+ "{}"
+ "ELSE;",
+ index, expr);
+ } else {
+ ctx.Add("CVT.U32.U64 RC.x,DC.x;"
+ "{},ssbo{}[RC.x];"
+ "ELSE;",
+ expr, index);
+ }
+ }
+ if (!else_expr.empty()) {
+ ctx.Add("{}", else_expr);
+ }
+ const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()};
+ for (size_t index = 0; index < num_used_buffers; ++index) {
+ ctx.Add("ENDIF;");
+ }
+}
+
+template <typename ValueType>
+void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
+ std::string_view size) {
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
+ } else {
+ StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
+ }
+}
+
+void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
+ } else {
+ StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
+ fmt::format("MOV.U {},{{0,0,0,0}};", ret));
+ }
+}
+
+template <typename ValueType>
+void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
+ } else {
+ GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
+ }
+}
+
+void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
+ } else {
+ GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
+ fmt::format("MOV.S {},0;", ret));
+ }
+}
+
+template <typename ValueType>
+void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ ValueType value, std::string_view operation, std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
+ offset);
+ } else {
+ StorageOp(ctx, binding, offset,
+ fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
+ }
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U8");
+}
+
+void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "S8");
+}
+
+void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U16");
+}
+
+void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "S16");
+}
+
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32");
+}
+
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32X2");
+}
+
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32X4");
+}
+
+void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U8");
+}
+
+void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "S8");
+}
+
+void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U16");
+}
+
+void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "S16");
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) {
+ GlobalWrite(ctx, address, value, "U32");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U32X2");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U32X4");
+}
+
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U8");
+}
+
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "S8");
+}
+
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U16");
+}
+
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "S16");
+}
+
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32");
+}
+
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32X2");
+}
+
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32X4");
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U8");
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value) {
+ Write(ctx, binding, offset, value, "S8");
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U16");
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value) {
+ Write(ctx, binding, offset, value, "S16");
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U32");
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value) {
+ Write(ctx, binding, offset, value, "U32X2");
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value) {
+ Write(ctx, binding, offset, value, "U32X4");
+}
+
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value) {
+ ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value) {
+ ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ Register value) {
+ ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "U32");
+}
+
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "S32");
+}
+
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "U32");
+}
+
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "S32");
+}
+
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "U32");
+}
+
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
+}
+
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
+}
+
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "AND", "U32");
+}
+
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "OR", "U32");
+}
+
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "XOR", "U32");
+}
+
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
+}
+
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "U64");
+}
+
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "S64");
+}
+
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "U64");
+}
+
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "S64");
+}
+
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "U64");
+}
+
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "AND", "U64");
+}
+
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "OR", "U64");
+}
+
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "XOR", "U64");
+}
+
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
+}
+
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarF32 value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "F32");
+}
+
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
+}
+
+void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
+}
+
+void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
+}
+
+void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
new file mode 100644
index 000000000..ff64c6924
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -0,0 +1,273 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+namespace Shader::Backend::GLASM {
+
+#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
+
+static void DefinePhi(EmitContext& ctx, IR::Inst& phi) {
+ switch (phi.Arg(0).Type()) {
+ case IR::Type::U1:
+ case IR::Type::U32:
+ case IR::Type::F32:
+ ctx.reg_alloc.Define(phi);
+ break;
+ case IR::Type::U64:
+ case IR::Type::F64:
+ ctx.reg_alloc.LongDefine(phi);
+ break;
+ default:
+ throw NotImplementedException("Phi node type {}", phi.Type());
+ }
+}
+
+void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ ctx.reg_alloc.Consume(phi.Arg(i));
+ }
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ DefinePhi(ctx, phi);
+ }
+}
+
+void EmitVoid(EmitContext&) {}
+
+void EmitReference(EmitContext& ctx, const IR::Value& value) {
+ ctx.reg_alloc.Consume(value);
+}
+
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
+ IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())};
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ DefinePhi(ctx, phi);
+ }
+ const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})};
+ const Value eval_value{ctx.reg_alloc.Consume(value)};
+
+ if (phi_reg == eval_value) {
+ return;
+ }
+ switch (phi.Flags<IR::Type>()) {
+ case IR::Type::U1:
+ case IR::Type::U32:
+ case IR::Type::F32:
+ ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value});
+ break;
+ case IR::Type::U64:
+ case IR::Type::F64:
+ ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value});
+ break;
+ default:
+ throw NotImplementedException("Phi node type {}", phi.Type());
+ }
+}
+
+void EmitJoin(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+ ctx.Add("KIL TR.x;");
+}
+
+void EmitBarrier(EmitContext& ctx) {
+ ctx.Add("BAR;");
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("MEMBAR.CTA;");
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("MEMBAR;");
+}
+
+void EmitPrologue(EmitContext& ctx) {
+ // TODO
+}
+
+void EmitEpilogue(EmitContext& ctx) {
+ // TODO
+}
+
+void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) {
+ if (stream.type == Type::U32 && stream.imm_u32 == 0) {
+ ctx.Add("EMIT;");
+ } else {
+ ctx.Add("EMITS {};", stream);
+ }
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+ if (!stream.IsImmediate()) {
+ LOG_WARNING(Shader_GLASM, "Stream is not immediate");
+ }
+ ctx.reg_alloc.Consume(stream);
+ ctx.Add("ENDPRIM;");
+}
+
+void EmitGetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {},invocation.groupid;", inst);
+}
+
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {},invocation.localid;", inst);
+}
+
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst);
+}
+
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst);
+}
+
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst);
+}
+
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
+ ctx.uses_y_direction = true;
+ ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
+}
+
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
+ ctx.LongAdd("MOV.S64 {}.x,0;", inst);
+}
+
+void EmitGetZeroFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSignFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCarryFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOverflowFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSparseFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetInBoundsFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("OR.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("AND.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("XOR.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("SEQ.S {},{},0;", inst, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
new file mode 100644
index 000000000..68fff613c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
@@ -0,0 +1,67 @@
+
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
+ Register false_value) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ const Register ret{ctx.reg_alloc.LongDefine(inst)};
+ if (ret == true_value) {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x(EQ.x),{};",
+ cond, ret, false_value);
+ } else if (ret == false_value) {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x(NE.x),{};",
+ cond, ret, true_value);
+ } else {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x,{};"
+ "MOV.U64 {}.x(NE.x),{};",
+ cond, ret, false_value, ret, true_value);
+ }
+}
+
+void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
new file mode 100644
index 000000000..c1498f449
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
@@ -0,0 +1,58 @@
+
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U8 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U16 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U32 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) {
+ ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) {
+ ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset);
+}
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
new file mode 100644
index 000000000..544d475b4
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
@@ -0,0 +1,150 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLASM {
+
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name);
+}
+
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGALL.S {}.x,{};", inst, pred);
+}
+
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGANY.S {}.x,{};", inst, pred);
+}
+
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGEQ.S {}.x,{};", inst, pred);
+}
+
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGBALLOT {}.x,{};", inst, pred);
+}
+
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name);
+}
+
+static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask,
+ std::string_view op) {
+ IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+ if (in_bounds) {
+ in_bounds->Invalidate();
+ }
+ std::string mask;
+ if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
+ mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
+ } else {
+ mask = "RC";
+ ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};",
+ ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)},
+ ScalarU32{ctx.reg_alloc.Consume(clamp)});
+ }
+ const Register value_ret{ctx.reg_alloc.Define(inst)};
+ if (in_bounds) {
+ const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
+ ctx.Add("SHF{}.U {},{},{},{};"
+ "MOV.U {}.x,{}.y;",
+ op, bounds_ret, value, index, mask, value_ret, bounds_ret);
+ } else {
+ ctx.Add("SHF{}.U {},{},{},{};"
+ "MOV.U {}.x,{}.y;",
+ op, value_ret, value, index, mask, value_ret, value_ret);
+ }
+}
+
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX");
+}
+
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP");
+}
+
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN");
+}
+
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
+}
+
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
+ ScalarU32 swizzle) {
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("AND.U RC.z,{}.threadid,3;"
+ "SHL.U RC.z,RC.z,1;"
+ "SHR.U RC.z,{},RC.z;"
+ "AND.U RC.z,RC.z,3;"
+ "MUL.F RC.x,{},FSWZA[RC.z];"
+ "MUL.F RC.y,{},FSWZB[RC.z];"
+ "ADD.F {}.x,RC.x,RC.y;",
+ ctx.stage_name, swizzle, op_a, op_b, ret);
+}
+
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDX.FINE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
+ ctx.Add("DDX {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDY.FINE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
+ ctx.Add("DDY {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDX.COARSE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
+ ctx.Add("DDX {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDY.COARSE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
+ ctx.Add("DDY {}.x,{};", inst, p);
+ }
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
new file mode 100644
index 000000000..4c046db6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -0,0 +1,186 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+Register RegAlloc::Define(IR::Inst& inst) {
+ return Define(inst, false);
+}
+
+Register RegAlloc::LongDefine(IR::Inst& inst) {
+ return Define(inst, true);
+}
+
+Value RegAlloc::Peek(const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return MakeImm(value);
+ } else {
+ return PeekInst(*value.Inst());
+ }
+}
+
+Value RegAlloc::Consume(const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return MakeImm(value);
+ } else {
+ return ConsumeInst(*value.Inst());
+ }
+}
+
+void RegAlloc::Unref(IR::Inst& inst) {
+ IR::Inst& value_inst{AliasInst(inst)};
+ value_inst.DestructiveRemoveUsage();
+ if (!value_inst.HasUses()) {
+ Free(value_inst.Definition<Id>());
+ }
+}
+
+Register RegAlloc::AllocReg() {
+ Register ret;
+ ret.type = Type::Register;
+ ret.id = Alloc(false);
+ return ret;
+}
+
+Register RegAlloc::AllocLongReg() {
+ Register ret;
+ ret.type = Type::Register;
+ ret.id = Alloc(true);
+ return ret;
+}
+
+void RegAlloc::FreeReg(Register reg) {
+ Free(reg.id);
+}
+
+Value RegAlloc::MakeImm(const IR::Value& value) {
+ Value ret;
+ switch (value.Type()) {
+ case IR::Type::Void:
+ ret.type = Type::Void;
+ break;
+ case IR::Type::U1:
+ ret.type = Type::U32;
+ ret.imm_u32 = value.U1() ? 0xffffffff : 0;
+ break;
+ case IR::Type::U32:
+ ret.type = Type::U32;
+ ret.imm_u32 = value.U32();
+ break;
+ case IR::Type::F32:
+ ret.type = Type::U32;
+ ret.imm_u32 = Common::BitCast<u32>(value.F32());
+ break;
+ case IR::Type::U64:
+ ret.type = Type::U64;
+ ret.imm_u64 = value.U64();
+ break;
+ case IR::Type::F64:
+ ret.type = Type::U64;
+ ret.imm_u64 = Common::BitCast<u64>(value.F64());
+ break;
+ default:
+ throw NotImplementedException("Immediate type {}", value.Type());
+ }
+ return ret;
+}
+
+Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
+ if (inst.HasUses()) {
+ inst.SetDefinition<Id>(Alloc(is_long));
+ } else {
+ Id id{};
+ id.is_long.Assign(is_long ? 1 : 0);
+ id.is_null.Assign(1);
+ inst.SetDefinition<Id>(id);
+ }
+ return Register{PeekInst(inst)};
+}
+
+Value RegAlloc::PeekInst(IR::Inst& inst) {
+ Value ret;
+ ret.type = Type::Register;
+ ret.id = inst.Definition<Id>();
+ return ret;
+}
+
+Value RegAlloc::ConsumeInst(IR::Inst& inst) {
+ Unref(inst);
+ return PeekInst(inst);
+}
+
+Id RegAlloc::Alloc(bool is_long) {
+ size_t& num_regs{is_long ? num_used_long_registers : num_used_registers};
+ std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use};
+ if (num_used_registers + num_used_long_registers < NUM_REGS) {
+ for (size_t reg = 0; reg < NUM_REGS; ++reg) {
+ if (use[reg]) {
+ continue;
+ }
+ num_regs = std::max(num_regs, reg + 1);
+ use[reg] = true;
+ Id ret{};
+ ret.is_valid.Assign(1);
+ ret.is_long.Assign(is_long ? 1 : 0);
+ ret.is_spill.Assign(0);
+ ret.is_condition_code.Assign(0);
+ ret.is_null.Assign(0);
+ ret.index.Assign(static_cast<u32>(reg));
+ return ret;
+ }
+ }
+ throw NotImplementedException("Register spilling");
+}
+
+void RegAlloc::Free(Id id) {
+ if (id.is_valid == 0) {
+ throw LogicError("Freeing invalid register");
+ }
+ if (id.is_spill != 0) {
+ throw NotImplementedException("Free spill");
+ }
+ if (id.is_long != 0) {
+ long_register_use[id.index] = false;
+ } else {
+ register_use[id.index] = false;
+ }
+}
+
+/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::Identity:
+ case IR::Opcode::BitCastU16F16:
+ case IR::Opcode::BitCastU32F32:
+ case IR::Opcode::BitCastU64F64:
+ case IR::Opcode::BitCastF16U16:
+ case IR::Opcode::BitCastF32U32:
+ case IR::Opcode::BitCastF64U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) {
+ IR::Inst* it{&inst};
+ while (IsAliased(*it)) {
+ const IR::Value arg{it->Arg(0)};
+ if (arg.IsImmediate()) {
+ break;
+ }
+ it = arg.InstRecursive();
+ }
+ return *it;
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h
new file mode 100644
index 000000000..82aec66c6
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.h
@@ -0,0 +1,303 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+
+#include <fmt/format.h>
+
+#include "common/bit_cast.h"
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext;
+
+enum class Type : u32 {
+ Void,
+ Register,
+ U32,
+ U64,
+};
+
+struct Id {
+ union {
+ u32 raw;
+ BitField<0, 1, u32> is_valid;
+ BitField<1, 1, u32> is_long;
+ BitField<2, 1, u32> is_spill;
+ BitField<3, 1, u32> is_condition_code;
+ BitField<4, 1, u32> is_null;
+ BitField<5, 27, u32> index;
+ };
+
+ bool operator==(Id rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+ bool operator!=(Id rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(sizeof(Id) == sizeof(u32));
+
+struct Value {
+ Type type;
+ union {
+ Id id;
+ u32 imm_u32;
+ u64 imm_u64;
+ };
+
+ bool operator==(const Value& rhs) const noexcept {
+ if (type != rhs.type) {
+ return false;
+ }
+ switch (type) {
+ case Type::Void:
+ return true;
+ case Type::Register:
+ return id == rhs.id;
+ case Type::U32:
+ return imm_u32 == rhs.imm_u32;
+ case Type::U64:
+ return imm_u64 == rhs.imm_u64;
+ }
+ return false;
+ }
+ bool operator!=(const Value& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+struct Register : Value {};
+struct ScalarRegister : Value {};
+struct ScalarU32 : Value {};
+struct ScalarS32 : Value {};
+struct ScalarF32 : Value {};
+struct ScalarF64 : Value {};
+
+class RegAlloc {
+public:
+ RegAlloc() = default;
+
+ Register Define(IR::Inst& inst);
+
+ Register LongDefine(IR::Inst& inst);
+
+ [[nodiscard]] Value Peek(const IR::Value& value);
+
+ Value Consume(const IR::Value& value);
+
+ void Unref(IR::Inst& inst);
+
+ [[nodiscard]] Register AllocReg();
+
+ [[nodiscard]] Register AllocLongReg();
+
+ void FreeReg(Register reg);
+
+ void InvalidateConditionCodes() {
+ // This does nothing for now
+ }
+
+ [[nodiscard]] size_t NumUsedRegisters() const noexcept {
+ return num_used_registers;
+ }
+
+ [[nodiscard]] size_t NumUsedLongRegisters() const noexcept {
+ return num_used_long_registers;
+ }
+
+ [[nodiscard]] bool IsEmpty() const noexcept {
+ return register_use.none() && long_register_use.none();
+ }
+
+ /// Returns true if the instruction is expected to be aliased to another
+ static bool IsAliased(const IR::Inst& inst);
+
+ /// Returns the underlying value out of an alias sequence
+ static IR::Inst& AliasInst(IR::Inst& inst);
+
+private:
+ static constexpr size_t NUM_REGS = 4096;
+ static constexpr size_t NUM_ELEMENTS = 4;
+
+ Value MakeImm(const IR::Value& value);
+
+ Register Define(IR::Inst& inst, bool is_long);
+
+ Value PeekInst(IR::Inst& inst);
+
+ Value ConsumeInst(IR::Inst& inst);
+
+ Id Alloc(bool is_long);
+
+ void Free(Id id);
+
+ size_t num_used_registers{};
+ size_t num_used_long_registers{};
+ std::bitset<NUM_REGS> register_use{};
+ std::bitset<NUM_REGS> long_register_use{};
+};
+
+template <bool scalar, typename FormatContext>
+auto FormatTo(FormatContext& ctx, Id id) {
+ if (id.is_condition_code != 0) {
+ throw NotImplementedException("Condition code emission");
+ }
+ if (id.is_spill != 0) {
+ throw NotImplementedException("Spill emission");
+ }
+ if constexpr (scalar) {
+ if (id.is_null != 0) {
+ return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
+ }
+ if (id.is_long != 0) {
+ return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
+ } else {
+ return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
+ }
+ } else {
+ if (id.is_null != 0) {
+ return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
+ }
+ if (id.is_long != 0) {
+ return fmt::format_to(ctx.out(), "D{}", id.index.Value());
+ } else {
+ return fmt::format_to(ctx.out(), "R{}", id.index.Value());
+ }
+ }
+}
+
+} // namespace Shader::Backend::GLASM
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::Id> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) {
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::Register> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) {
+ if (value.type != Shader::Backend::GLASM::Type::Register) {
+ throw Shader::InvalidArgument("Register value type is not register");
+ }
+ return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) {
+ if (value.type != Shader::Backend::GLASM::Type::Register) {
+ throw Shader::InvalidArgument("Register value type is not register");
+ }
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", value.imm_u32);
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32));
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32));
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ break;
+ case Shader::Backend::GLASM::Type::U64:
+ return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64));
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
new file mode 100644
index 000000000..4e6f2c0fe
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -0,0 +1,715 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+u32 CbufIndex(size_t offset) {
+ return (offset / 4) % 4;
+}
+
+char Swizzle(size_t offset) {
+ return "xyzw"[CbufIndex(offset)];
+}
+
+std::string_view InterpDecorator(Interpolation interp) {
+ switch (interp) {
+ case Interpolation::Smooth:
+ return "";
+ case Interpolation::Flat:
+ return "flat ";
+ case Interpolation::NoPerspective:
+ return "noperspective ";
+ }
+ throw InvalidArgument("Invalid interpolation {}", interp);
+}
+
+std::string_view InputArrayDecorator(Stage stage) {
+ switch (stage) {
+ case Stage::Geometry:
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ return "[]";
+ default:
+ return "";
+ }
+}
+
+bool StoresPerVertexAttributes(Stage stage) {
+ switch (stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ case Stage::Geometry:
+ case Stage::TessellationEval:
+ return true;
+ default:
+ return false;
+ }
+}
+
+std::string OutputDecorator(Stage stage, u32 size) {
+ switch (stage) {
+ case Stage::TessellationControl:
+ return fmt::format("[{}]", size);
+ default:
+ return "";
+ }
+}
+
+std::string_view SamplerType(TextureType type, bool is_depth) {
+ if (is_depth) {
+ switch (type) {
+ case TextureType::Color1D:
+ return "sampler1DShadow";
+ case TextureType::ColorArray1D:
+ return "sampler1DArrayShadow";
+ case TextureType::Color2D:
+ return "sampler2DShadow";
+ case TextureType::ColorArray2D:
+ return "sampler2DArrayShadow";
+ case TextureType::ColorCube:
+ return "samplerCubeShadow";
+ case TextureType::ColorArrayCube:
+ return "samplerCubeArrayShadow";
+ default:
+ throw NotImplementedException("Texture type: {}", type);
+ }
+ }
+ switch (type) {
+ case TextureType::Color1D:
+ return "sampler1D";
+ case TextureType::ColorArray1D:
+ return "sampler1DArray";
+ case TextureType::Color2D:
+ return "sampler2D";
+ case TextureType::ColorArray2D:
+ return "sampler2DArray";
+ case TextureType::Color3D:
+ return "sampler3D";
+ case TextureType::ColorCube:
+ return "samplerCube";
+ case TextureType::ColorArrayCube:
+ return "samplerCubeArray";
+ case TextureType::Buffer:
+ return "samplerBuffer";
+ default:
+ throw NotImplementedException("Texture type: {}", type);
+ }
+}
+
+std::string_view ImageType(TextureType type) {
+ switch (type) {
+ case TextureType::Color1D:
+ return "uimage1D";
+ case TextureType::ColorArray1D:
+ return "uimage1DArray";
+ case TextureType::Color2D:
+ return "uimage2D";
+ case TextureType::ColorArray2D:
+ return "uimage2DArray";
+ case TextureType::Color3D:
+ return "uimage3D";
+ case TextureType::ColorCube:
+ return "uimageCube";
+ case TextureType::ColorArrayCube:
+ return "uimageCubeArray";
+ case TextureType::Buffer:
+ return "uimageBuffer";
+ default:
+ throw NotImplementedException("Image type: {}", type);
+ }
+}
+
+std::string_view ImageFormatString(ImageFormat format) {
+ switch (format) {
+ case ImageFormat::Typeless:
+ return "";
+ case ImageFormat::R8_UINT:
+ return ",r8ui";
+ case ImageFormat::R8_SINT:
+ return ",r8i";
+ case ImageFormat::R16_UINT:
+ return ",r16ui";
+ case ImageFormat::R16_SINT:
+ return ",r16i";
+ case ImageFormat::R32_UINT:
+ return ",r32ui";
+ case ImageFormat::R32G32_UINT:
+ return ",rg32ui";
+ case ImageFormat::R32G32B32A32_UINT:
+ return ",rgba32ui";
+ default:
+ throw NotImplementedException("Image format: {}", format);
+ }
+}
+
+std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
+ if (is_written && !is_read) {
+ return "writeonly ";
+ }
+ if (is_read && !is_written) {
+ return "readonly ";
+ }
+ return "";
+}
+
+std::string_view GetTessMode(TessPrimitive primitive) {
+ switch (primitive) {
+ case TessPrimitive::Triangles:
+ return "triangles";
+ case TessPrimitive::Quads:
+ return "quads";
+ case TessPrimitive::Isolines:
+ return "isolines";
+ }
+ throw InvalidArgument("Invalid tessellation primitive {}", primitive);
+}
+
+std::string_view GetTessSpacing(TessSpacing spacing) {
+ switch (spacing) {
+ case TessSpacing::Equal:
+ return "equal_spacing";
+ case TessSpacing::FractionalOdd:
+ return "fractional_odd_spacing";
+ case TessSpacing::FractionalEven:
+ return "fractional_even_spacing";
+ }
+ throw InvalidArgument("Invalid tessellation spacing {}", spacing);
+}
+
+std::string_view InputPrimitive(InputTopology topology) {
+ switch (topology) {
+ case InputTopology::Points:
+ return "points";
+ case InputTopology::Lines:
+ return "lines";
+ case InputTopology::LinesAdjacency:
+ return "lines_adjacency";
+ case InputTopology::Triangles:
+ return "triangles";
+ case InputTopology::TrianglesAdjacency:
+ return "triangles_adjacency";
+ }
+ throw InvalidArgument("Invalid input topology {}", topology);
+}
+
+std::string_view OutputPrimitive(OutputTopology topology) {
+ switch (topology) {
+ case OutputTopology::PointList:
+ return "points";
+ case OutputTopology::LineStrip:
+ return "line_strip";
+ case OutputTopology::TriangleStrip:
+ return "triangle_strip";
+ }
+ throw InvalidArgument("Invalid output topology {}", topology);
+}
+
+void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) {
+ if (!ctx.info.stores.Legacy()) {
+ return;
+ }
+ if (ctx.info.stores.FixedFunctionTexture()) {
+ header += "vec4 gl_TexCoord[8];";
+ }
+ if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+ header += "vec4 gl_FrontColor;";
+ }
+ if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) {
+ header += "vec4 gl_FrontSecondaryColor;";
+ }
+ if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) {
+ header += "vec4 gl_BackColor;";
+ }
+ if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) {
+ header += "vec4 gl_BackSecondaryColor;";
+ }
+}
+
+void SetupOutPerVertex(EmitContext& ctx, std::string& header) {
+ if (!StoresPerVertexAttributes(ctx.stage)) {
+ return;
+ }
+ if (ctx.uses_geometry_passthrough) {
+ return;
+ }
+ header += "out gl_PerVertex{vec4 gl_Position;";
+ if (ctx.info.stores[IR::Attribute::PointSize]) {
+ header += "float gl_PointSize;";
+ }
+ if (ctx.info.stores.ClipDistances()) {
+ header += "float gl_ClipDistance[];";
+ }
+ if (ctx.info.stores[IR::Attribute::ViewportIndex] &&
+ ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
+ header += "int gl_ViewportIndex;";
+ }
+ SetupLegacyOutPerVertex(ctx, header);
+ header += "};";
+ if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) {
+ header += "out int gl_ViewportIndex;";
+ }
+}
+
+void SetupInPerVertex(EmitContext& ctx, std::string& header) {
+ // Currently only required for TessellationControl to adhere to
+ // ARB_separate_shader_objects requirements
+ if (ctx.stage != Stage::TessellationControl) {
+ return;
+ }
+ const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)};
+ const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]};
+ const bool loads_clip_distance{ctx.info.loads.ClipDistances()};
+ const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance};
+ if (!loads_per_vertex) {
+ return;
+ }
+ header += "in gl_PerVertex{";
+ if (loads_position) {
+ header += "vec4 gl_Position;";
+ }
+ if (loads_point_size) {
+ header += "float gl_PointSize;";
+ }
+ if (loads_clip_distance) {
+ header += "float gl_ClipDistance[];";
+ }
+ header += "}gl_in[gl_MaxPatchVertices];";
+}
+
+void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
+ if (!ctx.info.loads.Legacy()) {
+ return;
+ }
+ header += "in gl_PerFragment{";
+ if (ctx.info.loads.FixedFunctionTexture()) {
+ header += "vec4 gl_TexCoord[8];";
+ }
+ if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+ header += "vec4 gl_Color;";
+ }
+ header += "};";
+}
+
+} // Anonymous namespace
+
+EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_)
+ : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage},
+ uses_geometry_passthrough{program.is_geometry_passthrough &&
+ profile.support_geometry_shader_passthrough} {
+ if (profile.need_fastmath_off) {
+ header += "#pragma optionNV(fastmath off)\n";
+ }
+ SetupExtensions();
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ stage_name = "vs";
+ break;
+ case Stage::TessellationControl:
+ stage_name = "tcs";
+ header += fmt::format("layout(vertices={})out;", program.invocations);
+ break;
+ case Stage::TessellationEval:
+ stage_name = "tes";
+ header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive),
+ GetTessSpacing(runtime_info.tess_spacing),
+ runtime_info.tess_clockwise ? "cw" : "ccw");
+ break;
+ case Stage::Geometry:
+ stage_name = "gs";
+ header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology));
+ if (uses_geometry_passthrough) {
+ header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};";
+ break;
+ } else if (program.is_geometry_passthrough &&
+ !profile.support_geometry_shader_passthrough) {
+ LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported");
+ }
+ header += fmt::format(
+ "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];",
+ OutputPrimitive(program.output_topology), program.output_vertices);
+ break;
+ case Stage::Fragment:
+ stage_name = "fs";
+ position_name = "gl_FragCoord";
+ if (runtime_info.force_early_z) {
+ header += "layout(early_fragment_tests)in;";
+ }
+ if (info.uses_sample_id) {
+ header += "in int gl_SampleID;";
+ }
+ if (info.stores_sample_mask) {
+ header += "out int gl_SampleMask[];";
+ }
+ break;
+ case Stage::Compute:
+ stage_name = "cs";
+ const u32 local_x{std::max(program.workgroup_size[0], 1u)};
+ const u32 local_y{std::max(program.workgroup_size[1], 1u)};
+ const u32 local_z{std::max(program.workgroup_size[2], 1u)};
+ header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;",
+ local_x, local_y, local_z);
+ break;
+ }
+ SetupOutPerVertex(*this, header);
+ SetupInPerVertex(*this, header);
+ SetupLegacyInPerFragment(*this, header);
+
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) {
+ continue;
+ }
+ const auto qualifier{uses_geometry_passthrough ? "passthrough"
+ : fmt::format("location={}", index)};
+ header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier,
+ InterpDecorator(info.interpolation[index]), index,
+ InputArrayDecorator(stage));
+ }
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"};
+ header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index);
+ }
+ if (stage == Stage::Fragment) {
+ for (size_t index = 0; index < info.stores_frag_color.size(); ++index) {
+ if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
+ continue;
+ }
+ header += fmt::format("layout(location={})out vec4 frag_color{};", index, index);
+ }
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (info.stores.Generic(index)) {
+ DefineGenericOutput(index, program.invocations);
+ }
+ }
+ DefineConstantBuffers(bindings);
+ DefineStorageBuffers(bindings);
+ SetupImages(bindings);
+ SetupTextures(bindings);
+ DefineHelperFunctions();
+ DefineConstants();
+}
+
+void EmitContext::SetupExtensions() {
+ header += "#extension GL_ARB_separate_shader_objects : enable\n";
+ if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
+ header += "#extension GL_EXT_texture_shadow_lod : enable\n";
+ }
+ if (info.uses_int64 && profile.support_int64) {
+ header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
+ }
+ if (info.uses_int64_bit_atomics) {
+ header += "#extension GL_NV_shader_atomic_int64 : enable\n";
+ }
+ if (info.uses_atomic_f32_add) {
+ header += "#extension GL_NV_shader_atomic_float : enable\n";
+ }
+ if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
+ header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
+ }
+ if (info.uses_fp16) {
+ if (profile.support_gl_nv_gpu_shader_5) {
+ header += "#extension GL_NV_gpu_shader5 : enable\n";
+ }
+ if (profile.support_gl_amd_gpu_shader_half_float) {
+ header += "#extension GL_AMD_gpu_shader_half_float : enable\n";
+ }
+ }
+ if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
+ info.uses_subgroup_shuffles || info.uses_fswzadd) {
+ header += "#extension GL_ARB_shader_ballot : enable\n"
+ "#extension GL_ARB_shader_group_vote : enable\n";
+ if (!info.uses_int64 && profile.support_int64) {
+ header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
+ }
+ if (profile.support_gl_warp_intrinsics) {
+ header += "#extension GL_NV_shader_thread_shuffle : enable\n";
+ }
+ }
+ if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) &&
+ profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) {
+ header += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
+ }
+ if (info.uses_sparse_residency && profile.support_gl_sparse_textures) {
+ header += "#extension GL_ARB_sparse_texture2 : enable\n";
+ }
+ if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+ header += "#extension GL_NV_viewport_array2 : enable\n";
+ }
+ if (info.uses_typeless_image_reads) {
+ header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
+ }
+ if (info.uses_derivatives && profile.support_gl_derivative_control) {
+ header += "#extension GL_ARB_derivative_control : enable\n";
+ }
+ if (uses_geometry_passthrough) {
+ header += "#extension GL_NV_geometry_shader_passthrough : enable\n";
+ }
+}
+
+void EmitContext::DefineConstantBuffers(Bindings& bindings) {
+ if (info.constant_buffer_descriptors.empty()) {
+ return;
+ }
+ for (const auto& desc : info.constant_buffer_descriptors) {
+ header += fmt::format(
+ "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
+ bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
+ bindings.uniform_buffer += desc.count;
+ }
+}
+
+void EmitContext::DefineStorageBuffers(Bindings& bindings) {
+ if (info.storage_buffers_descriptors.empty()) {
+ return;
+ }
+ u32 index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};",
+ bindings.storage_buffer, stage_name, bindings.storage_buffer,
+ stage_name, index);
+ bindings.storage_buffer += desc.count;
+ index += desc.count;
+ }
+}
+
+void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
+ static constexpr std::string_view swizzle{"xyzw"};
+ const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+ u32 element{0};
+ while (element < 4) {
+ std::string definition{fmt::format("layout(location={}", index)};
+ const u32 remainder{4 - element};
+ const TransformFeedbackVarying* xfb_varying{};
+ if (!runtime_info.xfb_varyings.empty()) {
+ xfb_varying = &runtime_info.xfb_varyings[base_index + element];
+ xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
+ }
+ const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
+ if (element > 0) {
+ definition += fmt::format(",component={}", element);
+ }
+ if (xfb_varying) {
+ definition +=
+ fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
+ xfb_varying->stride, xfb_varying->offset);
+ }
+ std::string name{fmt::format("out_attr{}", index)};
+ if (num_components < 4 || element > 0) {
+ name += fmt::format("_{}", swizzle.substr(element, num_components));
+ }
+ const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
+ definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
+ header += definition;
+
+ const GenericElementInfo element_info{
+ .name = name,
+ .first_element = element,
+ .num_components = num_components,
+ };
+ std::fill_n(output_generics[index].begin() + element, num_components, element_info);
+ element += num_components;
+ }
+}
+
+void EmitContext::DefineHelperFunctions() {
+ header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
+ "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
+ if (info.uses_global_increment || info.uses_shared_increment) {
+ header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
+ }
+ if (info.uses_global_decrement || info.uses_shared_decrement) {
+ header += "uint CasDecrement(uint op_a,uint op_b){"
+ "return op_a==0||op_a>op_b?op_b:(op_a-1u);}";
+ }
+ if (info.uses_atomic_f32_add) {
+ header += "uint CasFloatAdd(uint op_a,float op_b){"
+ "return ftou(utof(op_a)+op_b);}";
+ }
+ if (info.uses_atomic_f32x2_add) {
+ header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){"
+ "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}";
+ }
+ if (info.uses_atomic_f32x2_min) {
+ header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return "
+ "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}";
+ }
+ if (info.uses_atomic_f32x2_max) {
+ header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return "
+ "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}";
+ }
+ if (info.uses_atomic_f16x2_add) {
+ header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return "
+ "packFloat2x16(unpackFloat2x16(op_a)+op_b);}";
+ }
+ if (info.uses_atomic_f16x2_min) {
+ header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return "
+ "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}";
+ }
+ if (info.uses_atomic_f16x2_max) {
+ header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return "
+ "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}";
+ }
+ if (info.uses_atomic_s32_min) {
+ header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
+ }
+ if (info.uses_atomic_s32_max) {
+ header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
+ }
+ if (info.uses_global_memory && profile.support_int64) {
+ header += DefineGlobalMemoryFunctions();
+ }
+ if (info.loads_indexed_attributes) {
+ const bool is_array{stage == Stage::Geometry};
+ const auto vertex_arg{is_array ? ",uint vertex" : ""};
+ std::string func{
+ fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint "
+ "masked_index=uint(base_index)&3u;switch(base_index>>2){{",
+ vertex_arg)};
+ if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ const auto position_idx{is_array ? "gl_in[vertex]." : ""};
+ func += fmt::format("case {}:return {}{}[masked_index];",
+ static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx,
+ position_name);
+ }
+ const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+ for (u32 index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.loads.Generic(index)) {
+ continue;
+ }
+ const auto vertex_idx{is_array ? "[vertex]" : ""};
+ func += fmt::format("case {}:return in_attr{}{}[masked_index];",
+ base_attribute_value + index, index, vertex_idx);
+ }
+ func += "default: return 0.0;}}";
+ header += func;
+ }
+ if (info.stores_indexed_attributes) {
+ // TODO
+ }
+}
+
+std::string EmitContext::DefineGlobalMemoryFunctions() {
+ const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) {
+ const auto& ssbo{info.storage_buffers_descriptors[index]};
+ const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
+ const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
+ const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)};
+ std::array<std::string, 2> addr_xy;
+ std::array<std::string, 2> size_xy;
+ for (size_t i = 0; i < addr_xy.size(); ++i) {
+ const auto addr_loc{ssbo.cbuf_offset + 4 * i};
+ const auto size_loc{size_cbuf_offset + 4 * i};
+ addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
+ size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
+ }
+ const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])};
+ const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
+ func += addr_statment;
+
+ const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])};
+ const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)};
+ const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)};
+ const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)};
+ func += comparison;
+
+ const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
+ func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr);
+ }};
+ std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"};
+ std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"};
+ std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"};
+ std::string load_func{"uint LoadGlobal32(uint64_t addr){"};
+ std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"};
+ std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"};
+ const size_t num_buffers{info.storage_buffers_descriptors.size()};
+ for (size_t index = 0; index < num_buffers; ++index) {
+ if (!info.nvn_buffer_used[index]) {
+ continue;
+ }
+ define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}");
+ define_body(write_func_64, index,
+ "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}");
+ define_body(write_func_128, index,
+ "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint("
+ "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}");
+ define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}");
+ define_body(load_func_64, index,
+ "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}");
+ define_body(load_func_128, index,
+ "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
+ "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
+ }
+ write_func += '}';
+ write_func_64 += '}';
+ write_func_128 += '}';
+ load_func += "return 0u;}";
+ load_func_64 += "return uvec2(0);}";
+ load_func_128 += "return uvec4(0);}";
+ return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128;
+}
+
+void EmitContext::SetupImages(Bindings& bindings) {
+ image_buffers.reserve(info.image_buffer_descriptors.size());
+ for (const auto& desc : info.image_buffer_descriptors) {
+ image_buffers.push_back({bindings.image, desc.count});
+ const auto format{ImageFormatString(desc.format)};
+ const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
+ const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+ header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
+ bindings.image, format, qualifier, bindings.image, array_decorator);
+ bindings.image += desc.count;
+ }
+ images.reserve(info.image_descriptors.size());
+ for (const auto& desc : info.image_descriptors) {
+ images.push_back({bindings.image, desc.count});
+ const auto format{ImageFormatString(desc.format)};
+ const auto image_type{ImageType(desc.type)};
+ const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
+ const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+ header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
+ qualifier, image_type, bindings.image, array_decorator);
+ bindings.image += desc.count;
+ }
+}
+
+void EmitContext::SetupTextures(Bindings& bindings) {
+ texture_buffers.reserve(info.texture_buffer_descriptors.size());
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ texture_buffers.push_back({bindings.texture, desc.count});
+ const auto sampler_type{SamplerType(TextureType::Buffer, false)};
+ const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+ header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
+ sampler_type, bindings.texture, array_decorator);
+ bindings.texture += desc.count;
+ }
+ textures.reserve(info.texture_descriptors.size());
+ for (const auto& desc : info.texture_descriptors) {
+ textures.push_back({bindings.texture, desc.count});
+ const auto sampler_type{SamplerType(desc.type, desc.is_depth)};
+ const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+ header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
+ sampler_type, bindings.texture, array_decorator);
+ bindings.texture += desc.count;
+ }
+}
+
+void EmitContext::DefineConstants() {
+ if (info.uses_fswzadd) {
+ header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);"
+ "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);";
+ }
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h
new file mode 100644
index 000000000..d9b639d29
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.h
@@ -0,0 +1,174 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glsl/var_alloc.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+struct Info;
+struct Profile;
+struct RuntimeInfo;
+} // namespace Shader
+
+namespace Shader::Backend {
+struct Bindings;
+}
+
+namespace Shader::IR {
+class Inst;
+struct Program;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+
+struct GenericElementInfo {
+ std::string name;
+ u32 first_element{};
+ u32 num_components{};
+};
+
+struct TextureImageDefinition {
+ u32 binding;
+ u32 count;
+};
+
+class EmitContext {
+public:
+ explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_);
+
+ template <GlslVarType type, typename... Args>
+ void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
+ const auto var_def{var_alloc.AddDefine(inst, type)};
+ if (var_def.empty()) {
+ // skip assigment.
+ code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...);
+ } else {
+ code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...);
+ }
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ template <typename... Args>
+ void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U1>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F16x2>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U32>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F32>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U64>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F64>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U32x2>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F32x2>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U32x3>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F32x3>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U32x4>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F32x4>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::PrecF32>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::PrecF64>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void Add(const char* format_str, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ std::string header;
+ std::string code;
+ VarAlloc var_alloc;
+ const Info& info;
+ const Profile& profile;
+ const RuntimeInfo& runtime_info;
+
+ Stage stage{};
+ std::string_view stage_name = "invalid";
+ std::string_view position_name = "gl_Position";
+
+ std::vector<TextureImageDefinition> texture_buffers;
+ std::vector<TextureImageDefinition> image_buffers;
+ std::vector<TextureImageDefinition> textures;
+ std::vector<TextureImageDefinition> images;
+ std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
+
+ u32 num_safety_loop_vars{};
+
+ bool uses_y_direction{};
+ bool uses_cc_carry{};
+ bool uses_geometry_passthrough{};
+
+private:
+ void SetupExtensions();
+ void DefineConstantBuffers(Bindings& bindings);
+ void DefineStorageBuffers(Bindings& bindings);
+ void DefineGenericOutput(size_t index, u32 invocations);
+ void DefineHelperFunctions();
+ void DefineConstants();
+ std::string DefineGlobalMemoryFunctions();
+ void SetupImages(Bindings& bindings);
+ void SetupTextures(Bindings& bindings);
+};
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
new file mode 100644
index 000000000..8a430d573
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
@@ -0,0 +1,252 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+#include <type_traits>
+
+#include "common/div_ceil.h"
+#include "common/settings.h"
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+ using ReturnType = ReturnType_;
+
+ static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <auto func, typename... Args>
+void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
+ inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
+}
+
+template <typename ArgType>
+auto Arg(EmitContext& ctx, const IR::Value& arg) {
+ if constexpr (std::is_same_v<ArgType, std::string_view>) {
+ return ctx.var_alloc.Consume(arg);
+ } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+ return arg;
+ } else if constexpr (std::is_same_v<ArgType, u32>) {
+ return arg.U32();
+ } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+ return arg.Attribute();
+ } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+ return arg.Patch();
+ } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+ return arg.Reg();
+ }
+}
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+ using Traits = FuncTraits<decltype(func)>;
+ if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
+ if constexpr (is_first_arg_inst) {
+ SetDefinition<func>(
+ ctx, inst, *inst,
+ Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+ } else {
+ SetDefinition<func>(
+ ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+ }
+ } else {
+ if constexpr (is_first_arg_inst) {
+ func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+ } else {
+ func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+ }
+ }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+ using Traits = FuncTraits<decltype(func)>;
+ static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+ if constexpr (Traits::NUM_ARGS == 1) {
+ Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+ } else {
+ using FirstArgType = typename Traits::template ArgType<1>;
+ static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
+ using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+ Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+ }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...) \
+ case IR::Opcode::name: \
+ return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+ }
+ throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+bool IsReference(IR::Inst& inst) {
+ return inst.GetOpcode() == IR::Opcode::Reference;
+}
+
+void PrecolorInst(IR::Inst& phi) {
+ // Insert phi moves before references to avoid overwritting other phis
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::Block& phi_block{*phi.PhiBlock(i)};
+ auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
+ IR::IREmitter ir{phi_block, it};
+ const IR::Value arg{phi.Arg(i)};
+ if (arg.IsImmediate()) {
+ ir.PhiMove(phi, arg);
+ } else {
+ ir.PhiMove(phi, IR::Value{arg.InstRecursive()});
+ }
+ }
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
+ }
+}
+
+void Precolor(const IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& phi : block->Instructions()) {
+ if (!IR::IsPhi(phi)) {
+ break;
+ }
+ PrecolorInst(phi);
+ }
+ }
+}
+
+void EmitCode(EmitContext& ctx, const IR::Program& program) {
+ for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::Block:
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ EmitInst(ctx, &inst);
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::If:
+ ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond));
+ break;
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ ctx.Add("}}");
+ break;
+ case IR::AbstractSyntaxNode::Type::Break:
+ if (node.data.break_node.cond.IsImmediate()) {
+ if (node.data.break_node.cond.U1()) {
+ ctx.Add("break;");
+ }
+ } else {
+ ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond));
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Return:
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ ctx.Add("return;");
+ break;
+ case IR::AbstractSyntaxNode::Type::Loop:
+ ctx.Add("for(;;){{");
+ break;
+ case IR::AbstractSyntaxNode::Type::Repeat:
+ if (Settings::values.disable_shader_loop_safety_checks) {
+ ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond));
+ } else {
+ ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++,
+ ctx.var_alloc.Consume(node.data.repeat.cond));
+ }
+ break;
+ default:
+ throw NotImplementedException("AbstractSyntaxNode Type {}", node.type);
+ }
+ }
+}
+
+std::string GlslVersionSpecifier(const EmitContext& ctx) {
+ if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) {
+ return " compatibility";
+ }
+ return "";
+}
+
+bool IsPreciseType(GlslVarType type) {
+ switch (type) {
+ case GlslVarType::PrecF32:
+ case GlslVarType::PrecF64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void DefineVariables(const EmitContext& ctx, std::string& header) {
+ for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) {
+ const auto type{static_cast<GlslVarType>(i)};
+ const auto& tracker{ctx.var_alloc.GetUseTracker(type)};
+ const auto type_name{ctx.var_alloc.GetGlslType(type)};
+ const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug};
+ const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""};
+ // Temps/return types that are never used are stored at index 0
+ if (tracker.uses_temp) {
+ header += fmt::format("{}{} t{}={}(0);", precise, type_name,
+ ctx.var_alloc.Representation(0, type), type_name);
+ }
+ for (u32 index = 0; index < tracker.num_used; ++index) {
+ header += fmt::format("{}{} {}={}(0);", precise, type_name,
+ ctx.var_alloc.Representation(index, type), type_name);
+ }
+ }
+ for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) {
+ header += fmt::format("int loop{}=0x2000;", i);
+ }
+}
+} // Anonymous namespace
+
+std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
+ Bindings& bindings) {
+ EmitContext ctx{program, bindings, profile, runtime_info};
+ Precolor(program);
+ EmitCode(ctx, program);
+ const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
+ ctx.header.insert(0, version);
+ if (program.shared_memory_size > 0) {
+ const auto requested_size{program.shared_memory_size};
+ const auto max_size{profile.gl_max_compute_smem_size};
+ const bool needs_clamp{requested_size > max_size};
+ if (needs_clamp) {
+ LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})",
+ requested_size, max_size);
+ }
+ const auto smem_size{needs_clamp ? max_size : requested_size};
+ ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U));
+ }
+ ctx.header += "void main(){\n";
+ if (program.local_memory_size > 0) {
+ ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U));
+ }
+ DefineVariables(ctx, ctx.header);
+ if (ctx.uses_cc_carry) {
+ ctx.header += "uint carry;";
+ }
+ if (program.info.uses_subgroup_shuffles) {
+ ctx.header += "bool shfl_in_bounds;";
+ }
+ ctx.code.insert(0, ctx.header);
+ ctx.code += '}';
+ return ctx.code;
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h
new file mode 100644
index 000000000..20e5719e6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.h
@@ -0,0 +1,24 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+
+[[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) {
+ Bindings binding;
+ return EmitGLSL(profile, {}, program, binding);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
new file mode 100644
index 000000000..772acc5a4
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -0,0 +1,418 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{
+ "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"};
+
+void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+ std::string_view value, std::string_view function) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ const std::string smem{fmt::format("smem[{}>>2]", offset)};
+ ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret);
+}
+
+void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value, std::string_view function) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset))};
+ ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
+}
+
+void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value,
+ std::string_view function) {
+ const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset))};
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
+ ctx.AddF32("{}=utof({});", inst, ret);
+}
+} // Anonymous namespace
+
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ const std::string u32_value{fmt::format("uint({})", value)};
+ SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32");
+}
+
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ const std::string u32_value{fmt::format("uint({})", value)};
+ SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32");
+}
+
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement");
+}
+
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement");
+}
+
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
+ pointer_offset);
+ ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
+ pointer_offset, value, pointer_offset, value);
+}
+
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ const std::string u32_value{fmt::format("uint({})", value)};
+ SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32");
+}
+
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ const std::string u32_value{fmt::format("uint({})", value)};
+ SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32");
+}
+
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement");
+}
+
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement");
+}
+
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("for(int i=0;i<2;++i){{ "
+ "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
+ ");}}",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("for(int i=0;i<2;++i){{ "
+ "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("for(int i=0;i<2;++i){{ "
+ "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
+ ");}}",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("for(int "
+ "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}"
+ "))[i]);}}",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU64(
+ "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
+ "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+ inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
+ "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+ inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU64(
+ "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
+ "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+ inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
+ "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+ inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
+}
+
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2");
+}
+
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2");
+}
+
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2");
+}
+
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2");
+}
+
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2");
+}
+
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2");
+}
+
+void EmitGlobalAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicInc32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicDec32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicOr32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicXor32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicIAdd64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMin64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMin64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMax64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMax64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicInc64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicDec64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAnd64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicOr64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicXor64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicExchange64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF16x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF32x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMinF16x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMinF32x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMaxF16x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMaxF32x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
new file mode 100644
index 000000000..e1d1b558e
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitBarrier(EmitContext& ctx) {
+ ctx.Add("barrier();");
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("groupMemoryBarrier();");
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("memoryBarrier();");
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
new file mode 100644
index 000000000..3c1714e89
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -0,0 +1,94 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void Alias(IR::Inst& inst, const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return;
+ }
+ IR::Inst& value_inst{*value.InstRecursive()};
+ value_inst.DestructiveAddUsage(inst.UseCount());
+ value_inst.DestructiveRemoveUsage();
+ inst.SetDefinition(value_inst.Definition<Id>());
+}
+} // Anonymous namespace
+
+void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
+ // Fake one usage to get a real variable out of the condition
+ inst.DestructiveAddUsage(1);
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
+ const auto input{ctx.var_alloc.Consume(value)};
+ if (ret != input) {
+ ctx.Add("{}={};", ret, input);
+ }
+}
+
+void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
+ NotImplemented();
+}
+
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=ftou({});", inst, value);
+}
+
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=doubleBitsToUint64({});", inst, value);
+}
+
+void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
+ NotImplemented();
+}
+
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=utof({});", inst, value);
+}
+
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=uint64BitsToDouble({});", inst, value);
+}
+
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=packUint2x32({});", inst, value);
+}
+
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32x2("{}=unpackUint2x32({});", inst, value);
+}
+
+void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=packFloat2x16({});", inst, value);
+}
+
+void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value);
+}
+
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=packHalf2x16({});", inst, value);
+}
+
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value);
+}
+
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=packDouble2x32({});", inst, value);
+}
+
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
new file mode 100644
index 000000000..49a66e3ec
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
@@ -0,0 +1,219 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr std::string_view SWIZZLE{"xyzw"};
+void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite,
+ std::string_view object, u32 index) {
+ if (result == composite) {
+ // The result is aliased with the composite
+ ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+ } else {
+ ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object);
+ }
+}
+} // Anonymous namespace
+
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2) {
+ ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2);
+}
+
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3) {
+ ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3, std::string_view e4) {
+ ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view e1,
+ [[maybe_unused]] std::string_view e2) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view e1,
+ [[maybe_unused]] std::string_view e2,
+ [[maybe_unused]] std::string_view e3) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view e1,
+ [[maybe_unused]] std::string_view e2,
+ [[maybe_unused]] std::string_view e3,
+ [[maybe_unused]] std::string_view e4) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] std::string_view object,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] std::string_view object,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] std::string_view object,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2) {
+ ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2);
+}
+
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3) {
+ ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3, std::string_view e4) {
+ ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index) {
+ ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+
+void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index) {
+ ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+
+void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index) {
+ ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
new file mode 100644
index 000000000..580063fa9
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -0,0 +1,456 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char SWIZZLE[]{"xyzw"};
+
+u32 CbufIndex(u32 offset) {
+ return (offset / 4) % 4;
+}
+
+char OffsetSwizzle(u32 offset) {
+ return SWIZZLE[CbufIndex(offset)];
+}
+
+bool IsInputArray(Stage stage) {
+ return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+ stage == Stage::TessellationEval;
+}
+
+std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) {
+ return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
+}
+
+std::string_view OutputVertexIndex(EmitContext& ctx) {
+ return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
+}
+
+void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
+ const IR::Value& offset, u32 num_bits, std::string_view cast = {},
+ std::string_view bit_offset = {}) {
+ const bool is_immediate{offset.IsImmediate()};
+ const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug};
+ if (is_immediate) {
+ const s32 signed_offset{static_cast<s32>(offset.U32())};
+ static constexpr u32 cbuf_size{0x10000};
+ if (signed_offset < 0 || offset.U32() > cbuf_size) {
+ LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
+ ctx.Add("{}=0u;", ret);
+ return;
+ }
+ }
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16)
+ : fmt::format("{}>>4", offset_var)};
+ const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
+ : fmt::format("[({}>>2)%4]", offset_var)};
+
+ const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+ const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
+ const auto extraction{num_bits == 32 ? cbuf_cast
+ : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast,
+ bit_offset, num_bits)};
+ if (!component_indexing_bug) {
+ const auto result{fmt::format(fmt::runtime(extraction), swizzle)};
+ ctx.Add("{}={};", ret, result);
+ return;
+ }
+ const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
+ for (u32 i = 0; i < 4; ++i) {
+ const auto swizzle_string{fmt::format(".{}", "xyzw"[i])};
+ const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)};
+ ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result);
+ }
+}
+
+void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
+ std::string_view cast) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ if (offset.IsImmediate()) {
+ const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)};
+ GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
+ } else {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("({}%4)*8", offset_var)};
+ GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
+ }
+}
+
+void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
+ std::string_view cast) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ if (offset.IsImmediate()) {
+ const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)};
+ GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
+ } else {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)};
+ GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
+ }
+}
+
+u32 TexCoordIndex(IR::Attribute attr) {
+ return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
+}
+} // Anonymous namespace
+
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ GetCbuf8(ctx, inst, binding, offset, "ftou");
+}
+
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ GetCbuf8(ctx, inst, binding, offset, "ftoi");
+}
+
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ GetCbuf16(ctx, inst, binding, offset, "ftou");
+}
+
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ GetCbuf16(ctx, inst, binding, offset, "ftoi");
+}
+
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ GetCbuf(ctx, ret, binding, offset, 32, "ftou");
+}
+
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
+ GetCbuf(ctx, ret, binding, offset, 32);
+}
+
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+ if (offset.IsImmediate()) {
+ static constexpr u32 cbuf_size{0x10000};
+ const u32 u32_offset{offset.U32()};
+ const s32 signed_offset{static_cast<s32>(offset.U32())};
+ if (signed_offset < 0 || u32_offset > cbuf_size) {
+ LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
+ ctx.AddU32x2("{}=uvec2(0u);", inst);
+ return;
+ }
+ if (u32_offset % 2 == 0) {
+ ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
+ OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
+ } else {
+ ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
+ OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
+ OffsetSwizzle(u32_offset + 4));
+ }
+ return;
+ }
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ if (!ctx.profile.has_gl_component_indexing_bug) {
+ ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
+ inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
+ return;
+ }
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
+ const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
+ for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+ ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
+ swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
+ "xyzw"[(swizzle + 1) % 4]);
+ }
+}
+
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
+ std::string_view vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+ if (element == 3) {
+ ctx.AddF32("{}=1.f;", inst, attr);
+ } else {
+ ctx.AddF32("{}=0.f;", inst, attr);
+ }
+ return;
+ }
+ ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle);
+ return;
+ }
+ // GLSL only exposes 8 legacy texcoords
+ if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
+ LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
+ TexCoordIndex(attr));
+ ctx.AddF32("{}=0.f;", inst);
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::PrimitiveId:
+ ctx.AddF32("{}=itof(gl_PrimitiveID);", inst);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW: {
+ const bool is_array{IsInputArray(ctx.stage)};
+ const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""};
+ ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle);
+ break;
+ }
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ if (ctx.stage == Stage::Fragment) {
+ ctx.AddF32("{}=gl_Color.{};", inst, swizzle);
+ } else {
+ ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle);
+ }
+ break;
+ case IR::Attribute::PointSpriteS:
+ case IR::Attribute::PointSpriteT:
+ ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle);
+ break;
+ case IR::Attribute::TessellationEvaluationPointU:
+ case IR::Attribute::TessellationEvaluationPointV:
+ ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle);
+ break;
+ case IR::Attribute::InstanceId:
+ ctx.AddF32("{}=itof(gl_InstanceID);", inst);
+ break;
+ case IR::Attribute::VertexId:
+ ctx.AddF32("{}=itof(gl_VertexID);", inst);
+ break;
+ case IR::Attribute::FrontFace:
+ ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
+ break;
+ default:
+ throw NotImplementedException("Get attribute {}", attr);
+ }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
+ [[maybe_unused]] std::string_view vertex) {
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ const u32 attr_element{IR::GenericAttributeElement(attr)};
+ const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
+ const auto output_decorator{OutputVertexIndex(ctx)};
+ if (info.num_components == 1) {
+ ctx.Add("{}{}={};", info.name, output_decorator, value);
+ } else {
+ const u32 index_element{attr_element - info.first_element};
+ ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value);
+ }
+ return;
+ }
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ // GLSL only exposes 8 legacy texcoords
+ if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
+ LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
+ TexCoordIndex(attr));
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::Layer:
+ if (ctx.stage != Stage::Geometry &&
+ !ctx.profile.support_viewport_index_layer_non_geometry) {
+ LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support "
+ "viewport layer extension");
+ break;
+ }
+ ctx.Add("gl_Layer=ftoi({});", value);
+ break;
+ case IR::Attribute::ViewportIndex:
+ if (ctx.stage != Stage::Geometry &&
+ !ctx.profile.support_viewport_index_layer_non_geometry) {
+ LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support "
+ "viewport layer extension");
+ break;
+ }
+ ctx.Add("gl_ViewportIndex=ftoi({});", value);
+ break;
+ case IR::Attribute::ViewportMask:
+ if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) {
+ LOG_WARNING(
+ Shader_GLSL,
+ "Shader stores viewport mask but device does not support viewport mask extension");
+ break;
+ }
+ ctx.Add("gl_ViewportMask[0]=ftoi({});", value);
+ break;
+ case IR::Attribute::PointSize:
+ ctx.Add("gl_PointSize={};", value);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ ctx.Add("gl_Position.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ ctx.Add("gl_FrontColor.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontSpecularR:
+ case IR::Attribute::ColorFrontSpecularG:
+ case IR::Attribute::ColorFrontSpecularB:
+ case IR::Attribute::ColorFrontSpecularA:
+ ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackDiffuseR:
+ case IR::Attribute::ColorBackDiffuseG:
+ case IR::Attribute::ColorBackDiffuseB:
+ case IR::Attribute::ColorBackDiffuseA:
+ ctx.Add("gl_BackColor.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackSpecularR:
+ case IR::Attribute::ColorBackSpecularG:
+ case IR::Attribute::ColorBackSpecularB:
+ case IR::Attribute::ColorBackSpecularA:
+ ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::FogCoordinate:
+ ctx.Add("gl_FogFragCoord={};", value);
+ break;
+ case IR::Attribute::ClipDistance0:
+ case IR::Attribute::ClipDistance1:
+ case IR::Attribute::ClipDistance2:
+ case IR::Attribute::ClipDistance3:
+ case IR::Attribute::ClipDistance4:
+ case IR::Attribute::ClipDistance5:
+ case IR::Attribute::ClipDistance6:
+ case IR::Attribute::ClipDistance7: {
+ const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
+ ctx.Add("gl_ClipDistance[{}]={};", index, value);
+ break;
+ }
+ default:
+ throw NotImplementedException("Set attribute {}", attr);
+ }
+}
+
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+ std::string_view vertex) {
+ const bool is_array{ctx.stage == Stage::Geometry};
+ const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""};
+ ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg);
+}
+
+void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view offset,
+ [[maybe_unused]] std::string_view value,
+ [[maybe_unused]] std::string_view vertex) {
+ NotImplemented();
+}
+
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Non-generic patch load");
+ }
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ const char swizzle{"xyzw"[element]};
+ ctx.AddF32("{}=patch{}.{};", inst, index, swizzle);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) {
+ if (IR::IsGeneric(patch)) {
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ ctx.Add("patch{}.{}={};", index, "xyzw"[element], value);
+ return;
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodBottom: {
+ const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+ ctx.Add("gl_TessLevelOuter[{}]={};", index, value);
+ break;
+ }
+ case IR::Patch::TessellationLodInteriorU:
+ ctx.Add("gl_TessLevelInner[0]={};", value);
+ break;
+ case IR::Patch::TessellationLodInteriorV:
+ ctx.Add("gl_TessLevelInner[1]={};", value);
+ break;
+ default:
+ throw NotImplementedException("Patch {}", patch);
+ }
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) {
+ const char swizzle{"xyzw"[component]};
+ ctx.Add("frag_color{}.{}={};", index, swizzle, value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, std::string_view value) {
+ ctx.Add("gl_SampleMask[0]=int({});", value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, std::string_view value) {
+ ctx.Add("gl_FragDepth={};", value);
+}
+
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32x3("{}=gl_LocalInvocationID;", inst);
+}
+
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32x3("{}=gl_WorkGroupID;", inst);
+}
+
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_InvocationID);", inst);
+}
+
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SampleID);", inst);
+}
+
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU1("{}=gl_HelperInvocation;", inst);
+}
+
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
+ ctx.uses_y_direction = true;
+ ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
+}
+
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
+ ctx.AddU32("{}=lmem[{}];", inst, word_offset);
+}
+
+void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) {
+ ctx.Add("lmem[{}]={};", word_offset, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
new file mode 100644
index 000000000..53f8896be
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitJoin(EmitContext&) {
+ throw NotImplementedException("Join shouldn't be emitted");
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+ ctx.Add("discard;");
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
new file mode 100644
index 000000000..eeae6562c
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
@@ -0,0 +1,230 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value);
+}
+
+void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=int({});", inst, value);
+}
+
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=int({});", inst, value);
+}
+
+void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=int64_t({});", inst, value);
+}
+
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=int64_t({});", inst, value);
+}
+
+void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double({});", inst, value);
+}
+
+void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float(int({}));", inst, value);
+}
+
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float(int64_t({}));", inst, value);
+}
+
+void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float({}&0xffff);", inst, value);
+}
+
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double(int({}));", inst, value);
+}
+
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double(int64_t({}));", inst, value);
+}
+
+void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double({});", inst, value);
+}
+
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
new file mode 100644
index 000000000..d423bfb1b
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
@@ -0,0 +1,456 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs,
+ std::string_view op, bool ordered) {
+ const auto nan_op{ordered ? "&&!" : "||"};
+ ctx.AddU1("{}={}{}{}"
+ "{}isnan({}){}isnan({});",
+ inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs);
+}
+
+bool IsPrecise(const IR::Inst& inst) {
+ return inst.Flags<IR::FpControl>().no_contraction;
+}
+} // Anonymous namespace
+
+void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=abs({});", inst, value);
+}
+
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=abs({});", inst, value);
+}
+
+void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
+ NotImplemented();
+}
+
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF32("{}={}+{};", inst, a, b);
+ } else {
+ ctx.AddF32("{}={}+{};", inst, a, b);
+ }
+}
+
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF64("{}={}+{};", inst, a, b);
+ } else {
+ ctx.AddF64("{}={}+{};", inst, a, b);
+ }
+}
+
+void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b,
+ [[maybe_unused]] std::string_view c) {
+ NotImplemented();
+}
+
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c);
+ } else {
+ ctx.AddF32("{}=fma({},{},{});", inst, a, b, c);
+ }
+}
+
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c);
+ } else {
+ ctx.AddF64("{}=fma({},{},{});", inst, a, b, c);
+ }
+}
+
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddF32("{}=max({},{});", inst, a, b);
+}
+
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddF64("{}=max({},{});", inst, a, b);
+}
+
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddF32("{}=min({},{});", inst, a, b);
+}
+
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddF64("{}=min({},{});", inst, a, b);
+}
+
+void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
+ NotImplemented();
+}
+
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF32("{}={}*{};", inst, a, b);
+ } else {
+ ctx.AddF32("{}={}*{};", inst, a, b);
+ }
+}
+
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF64("{}={}*{};", inst, a, b);
+ } else {
+ ctx.AddF64("{}={}*{};", inst, a, b);
+ }
+}
+
+void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=-({});", inst, value);
+}
+
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=-({});", inst, value);
+}
+
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=sin({});", inst, value);
+}
+
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=cos({});", inst, value);
+}
+
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=exp2({});", inst, value);
+}
+
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=log2({});", inst, value);
+}
+
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=(1.0f)/{};", inst, value);
+}
+
+void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=1.0/{};", inst, value);
+}
+
+void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ ctx.AddF32("{}=inversesqrt({});", inst, value);
+}
+
+void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=sqrt({});", inst, value);
+}
+
+void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value);
+}
+
+void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value);
+}
+
+void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value,
+ [[maybe_unused]] std::string_view min_value,
+ [[maybe_unused]] std::string_view max_value) {
+ NotImplemented();
+}
+
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value) {
+ // GLSL's clamp does not produce desirable results
+ ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value);
+}
+
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value) {
+ // GLSL's clamp does not produce desirable results
+ ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value);
+}
+
+void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=roundEven({});", inst, value);
+}
+
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=roundEven({});", inst, value);
+}
+
+void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=floor({});", inst, value);
+}
+
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=floor({});", inst, value);
+}
+
+void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=ceil({});", inst, value);
+}
+
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=ceil({});", inst, value);
+}
+
+void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=trunc({});", inst, value);
+}
+
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=trunc({});", inst, value);
+}
+
+void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "==", true);
+}
+
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "==", true);
+}
+
+void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "==", false);
+}
+
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "==", false);
+}
+
+void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "!=", true);
+}
+
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "!=", true);
+}
+
+void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "!=", false);
+}
+
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "!=", false);
+}
+
+void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<", true);
+}
+
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<", true);
+}
+
+void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<", false);
+}
+
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<", false);
+}
+
+void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">", true);
+}
+
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">", true);
+}
+
+void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">", false);
+}
+
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">", false);
+}
+
+void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<=", true);
+}
+
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<=", true);
+}
+
+void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<=", false);
+}
+
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<=", false);
+}
+
+void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">=", true);
+}
+
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">=", true);
+}
+
+void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">=", false);
+}
+
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">=", false);
+}
+
+void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU1("{}=isnan({});", inst, value);
+}
+
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU1("{}=isnan({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
new file mode 100644
index 000000000..447eb8e0a
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -0,0 +1,799 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
+ const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index)
+ : ctx.textures.at(info.descriptor_index)};
+ const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
+ return fmt::format("tex{}{}", def.binding, index_offset);
+}
+
+std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
+ const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index)
+ : ctx.images.at(info.descriptor_index)};
+ const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
+ return fmt::format("img{}{}", def.binding, index_offset);
+}
+
+std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::Buffer:
+ return fmt::format("int({})", value);
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ case TextureType::ColorArray2D:
+ return fmt::format("ivec2({})", value);
+ case TextureType::Color3D:
+ case TextureType::ColorCube:
+ return fmt::format("ivec3({})", value);
+ case TextureType::ColorArrayCube:
+ return fmt::format("ivec4({})", value);
+ default:
+ throw NotImplementedException("Integer cast for TextureType {}", info.type.Value());
+ }
+}
+
+std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::Buffer:
+ return fmt::format("int({})", value);
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ return fmt::format("ivec2({})", value);
+ case TextureType::ColorArray2D:
+ case TextureType::Color3D:
+ case TextureType::ColorCube:
+ return fmt::format("ivec3({})", value);
+ case TextureType::ColorArrayCube:
+ return fmt::format("ivec4({})", value);
+ default:
+ throw NotImplementedException("TexelFetchCast type {}", info.type.Value());
+ }
+}
+
+bool NeedsShadowLodExt(TextureType type) {
+ switch (type) {
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ case TextureType::ColorArrayCube:
+ return true;
+ default:
+ return false;
+ }
+}
+
+std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ return fmt::format("int({})", offset.U32());
+ }
+ IR::Inst* const inst{offset.InstRecursive()};
+ if (inst->AreAllArgsImmediates()) {
+ switch (inst->GetOpcode()) {
+ case IR::Opcode::CompositeConstructU32x2:
+ return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32());
+ case IR::Opcode::CompositeConstructU32x3:
+ return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
+ inst->Arg(2).U32());
+ case IR::Opcode::CompositeConstructU32x4:
+ return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
+ inst->Arg(2).U32(), inst->Arg(3).U32());
+ default:
+ break;
+ }
+ }
+ const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi};
+ if (!has_var_aoffi) {
+ LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING");
+ }
+ const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"};
+ switch (offset.Type()) {
+ case IR::Type::U32:
+ return fmt::format("int({})", offset_str);
+ case IR::Type::U32x2:
+ return fmt::format("ivec2({})", offset_str);
+ case IR::Type::U32x3:
+ return fmt::format("ivec3({})", offset_str);
+ case IR::Type::U32x4:
+ return fmt::format("ivec4({})", offset_str);
+ default:
+ throw NotImplementedException("Offset type {}", offset.Type());
+ }
+}
+
+std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) {
+ const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
+ if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
+ LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING");
+ return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))";
+ }
+ const IR::Opcode opcode{values[0]->GetOpcode()};
+ if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
+ throw LogicError("Invalid PTP arguments");
+ }
+ auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
+
+ return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0),
+ read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2),
+ read(1, 3));
+}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+ const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (sparse_inst) {
+ sparse_inst->Invalidate();
+ }
+ return sparse_inst;
+}
+} // Anonymous namespace
+
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view bias_lc,
+ const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (!offset.IsEmpty()) {
+ const auto offset_str{GetOffsetVec(ctx, offset)};
+ if (ctx.stage == Stage::Fragment) {
+ ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias);
+ } else {
+ ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str);
+ }
+ } else {
+ if (ctx.stage == Stage::Fragment) {
+ ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias);
+ } else {
+ ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords);
+ }
+ }
+ return;
+ }
+ if (!offset.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));",
+ *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias);
+ } else {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst,
+ texture, coords, texel, bias);
+ }
+}
+
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view lod_lc,
+ const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ if (info.has_bias) {
+ throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples");
+ }
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (!offset.IsEmpty()) {
+ ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc,
+ GetOffsetVec(ctx, offset));
+ } else {
+ ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc);
+ }
+ return;
+ }
+ if (!offset.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), lod_lc,
+ GetOffsetVec(ctx, offset), texel);
+ } else {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst,
+ texture, coords, lod_lc, texel);
+ }
+}
+
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view dref,
+ std::string_view bias_lc, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ if (sparse_inst) {
+ throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples");
+ }
+ if (info.has_bias) {
+ throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples");
+ }
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
+ const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
+ const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
+ const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod &&
+ ctx.stage != Stage::Fragment && needs_shadow_ext};
+ if (use_grad) {
+ LOG_WARNING(Shader_GLSL,
+ "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
+ if (info.type == TextureType::ColorArrayCube) {
+ LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
+ ctx.AddF32("{}=0.0f;", inst);
+ return;
+ }
+ const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
+ ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
+ d_cast, d_cast);
+ return;
+ }
+ if (!offset.IsEmpty()) {
+ const auto offset_str{GetOffsetVec(ctx, offset)};
+ if (ctx.stage == Stage::Fragment) {
+ ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref,
+ offset_str, bias);
+ } else {
+ ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords,
+ dref, offset_str);
+ }
+ } else {
+ if (ctx.stage == Stage::Fragment) {
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref);
+ } else {
+ ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
+ }
+ } else {
+ ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref);
+ }
+ }
+}
+
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view dref,
+ std::string_view lod_lc, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ if (sparse_inst) {
+ throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples");
+ }
+ if (info.has_bias) {
+ throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples");
+ }
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
+ const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext};
+ const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
+ if (use_grad) {
+ LOG_WARNING(Shader_GLSL,
+ "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
+ if (info.type == TextureType::ColorArrayCube) {
+ LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
+ ctx.AddF32("{}=0.0f;", inst);
+ return;
+ }
+ const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
+ ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
+ d_cast, d_cast);
+ return;
+ }
+ if (!offset.IsEmpty()) {
+ const auto offset_str{GetOffsetVec(ctx, offset)};
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc,
+ offset_str);
+ } else {
+ ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords,
+ dref, lod_lc, offset_str);
+ }
+ } else {
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc);
+ } else {
+ ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref,
+ lod_lc);
+ }
+ }
+}
+
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& offset, const IR::Value& offset2) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto texture{Texture(ctx, info, index)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (offset.IsEmpty()) {
+ ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords,
+ info.gather_component);
+ return;
+ }
+ if (offset2.IsEmpty()) {
+ ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords,
+ GetOffsetVec(ctx, offset), info.gather_component);
+ return;
+ }
+ // PTP
+ const auto offsets{PtpOffsets(offset, offset2)};
+ ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets,
+ info.gather_component);
+ return;
+ }
+ if (offset.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));",
+ *sparse_inst, texture, coords, texel, info.gather_component);
+ return;
+ }
+ if (offset2.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
+ *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset),
+ texel, info.gather_component);
+ return;
+ }
+ // PTP
+ const auto offsets{PtpOffsets(offset, offset2)};
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
+ *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel,
+ info.gather_component);
+}
+
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
+ std::string_view dref) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto texture{Texture(ctx, info, index)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (offset.IsEmpty()) {
+ ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref);
+ return;
+ }
+ if (offset2.IsEmpty()) {
+ ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref,
+ GetOffsetVec(ctx, offset));
+ return;
+ }
+ // PTP
+ const auto offsets{PtpOffsets(offset, offset2)};
+ ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets);
+ return;
+ }
+ if (offset.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst,
+ texture, coords, dref, texel);
+ return;
+ }
+ if (offset2.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), dref,
+ GetOffsetVec(ctx, offset), texel);
+ return;
+ }
+ // PTP
+ const auto offsets{PtpOffsets(offset, offset2)};
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel);
+}
+
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view offset, std::string_view lod,
+ [[maybe_unused]] std::string_view ms) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ if (info.has_bias) {
+ throw NotImplementedException("EmitImageFetch Bias texture samples");
+ }
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageFetch Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (!offset.empty()) {
+ ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture,
+ CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info));
+ } else {
+ if (info.type == TextureType::Buffer) {
+ ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
+ } else {
+ ctx.Add("{}=texelFetch({},{},int({}));", texel, texture,
+ CoordsCastToInt(coords, info), lod);
+ }
+ }
+ return;
+ }
+ if (!offset.empty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), lod,
+ CastToIntVec(offset, info), texel);
+ } else {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), lod, texel);
+ }
+}
+
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view lod) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto texture{Texture(ctx, info, index)};
+ switch (info.type) {
+ case TextureType::Color1D:
+ return ctx.AddU32x4(
+ "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst,
+ texture, lod, texture);
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ case TextureType::ColorCube:
+ return ctx.AddU32x4(
+ "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst,
+ texture, lod, texture);
+ case TextureType::ColorArray2D:
+ case TextureType::Color3D:
+ case TextureType::ColorArrayCube:
+ return ctx.AddU32x4(
+ "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture,
+ lod, texture);
+ case TextureType::Buffer:
+ throw NotImplementedException("EmitImageQueryDimensions Texture buffers");
+ }
+ throw LogicError("Unspecified image type {}", info.type.Value());
+}
+
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto texture{Texture(ctx, info, index)};
+ return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords);
+}
+
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& derivatives,
+ const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageGradient Lod clamp samples");
+ }
+ const auto sparse_inst{PrepareSparse(inst)};
+ if (sparse_inst) {
+ throw NotImplementedException("EmitImageGradient Sparse");
+ }
+ if (!offset.IsEmpty()) {
+ throw NotImplementedException("EmitImageGradient offset");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+ const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
+ if (multi_component) {
+ ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
+ derivatives_vec, derivatives_vec);
+ } else {
+ ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords,
+ derivatives_vec, derivatives_vec);
+ }
+}
+
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ if (sparse_inst) {
+ throw NotImplementedException("EmitImageRead Sparse");
+ }
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info));
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view color) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color);
+}
+
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
+ std::string_view) {
+ NotImplemented();
+}
+
+void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
+ std::string_view) {
+ NotImplemented();
+}
+
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitBindlessImageSampleImplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageSampleExplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageGather(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageGatherDref(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageFetch(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageQueryDimensions(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageQueryLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageGradient(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageRead(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageWrite(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageSampleImplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageSampleExplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageGather(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageGatherDref(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageFetch(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageQueryDimensions(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageQueryLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageGradient(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageRead(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageWrite(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicIAdd32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicSMin32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicUMin32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicSMax32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicUMax32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicInc32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicDec32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicAnd32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicOr32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicXor32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicExchange32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicIAdd32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicSMin32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicUMin32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicSMax32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicUMax32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicInc32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicDec32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicAnd32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicOr32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicXor32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicExchange32(EmitContext&) {
+ NotImplemented();
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
new file mode 100644
index 000000000..5936d086f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -0,0 +1,702 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string_view>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+class EmitContext;
+
+#define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__)
+
+// Microinstruction emitters
+void EmitPhi(EmitContext& ctx, IR::Inst& inst);
+void EmitVoid(EmitContext& ctx);
+void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitReference(EmitContext& ctx, const IR::Value& value);
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
+ std::string_view vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
+ std::string_view vertex);
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+ std::string_view vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value,
+ std::string_view vertex);
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value);
+void EmitSetSampleMask(EmitContext& ctx, std::string_view value);
+void EmitSetFragDepth(EmitContext& ctx, std::string_view value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
+void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadGlobalU8(EmitContext& ctx);
+void EmitLoadGlobalS8(EmitContext& ctx);
+void EmitLoadGlobalU16(EmitContext& ctx);
+void EmitLoadGlobalS16(EmitContext& ctx);
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitWriteGlobalU8(EmitContext& ctx);
+void EmitWriteGlobalS8(EmitContext& ctx);
+void EmitWriteGlobalU16(EmitContext& ctx);
+void EmitWriteGlobalS16(EmitContext& ctx);
+void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2);
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3);
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3, std::string_view e4);
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2);
+void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2,
+ std::string_view e3);
+void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2,
+ std::string_view e3, std::string_view e4);
+void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2);
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3);
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3, std::string_view e4);
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+ std::string_view false_value);
+void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+ std::string_view false_value);
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+ std::string_view false_value);
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst);
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst);
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c);
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c);
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c);
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value);
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value);
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value);
+void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view insert, std::string_view offset, std::string_view count);
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view offset, std::string_view count);
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view offset, std::string_view count);
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+ std::string_view max);
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+ std::string_view max);
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitGlobalAtomicIAdd32(EmitContext& ctx);
+void EmitGlobalAtomicSMin32(EmitContext& ctx);
+void EmitGlobalAtomicUMin32(EmitContext& ctx);
+void EmitGlobalAtomicSMax32(EmitContext& ctx);
+void EmitGlobalAtomicUMax32(EmitContext& ctx);
+void EmitGlobalAtomicInc32(EmitContext& ctx);
+void EmitGlobalAtomicDec32(EmitContext& ctx);
+void EmitGlobalAtomicAnd32(EmitContext& ctx);
+void EmitGlobalAtomicOr32(EmitContext& ctx);
+void EmitGlobalAtomicXor32(EmitContext& ctx);
+void EmitGlobalAtomicExchange32(EmitContext& ctx);
+void EmitGlobalAtomicIAdd64(EmitContext& ctx);
+void EmitGlobalAtomicSMin64(EmitContext& ctx);
+void EmitGlobalAtomicUMin64(EmitContext& ctx);
+void EmitGlobalAtomicSMax64(EmitContext& ctx);
+void EmitGlobalAtomicUMax64(EmitContext& ctx);
+void EmitGlobalAtomicInc64(EmitContext& ctx);
+void EmitGlobalAtomicDec64(EmitContext& ctx);
+void EmitGlobalAtomicAnd64(EmitContext& ctx);
+void EmitGlobalAtomicOr64(EmitContext& ctx);
+void EmitGlobalAtomicXor64(EmitContext& ctx);
+void EmitGlobalAtomicExchange64(EmitContext& ctx);
+void EmitGlobalAtomicAddF32(EmitContext& ctx);
+void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBindlessImageSampleImplicitLod(EmitContext&);
+void EmitBindlessImageSampleExplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+void EmitBindlessImageGather(EmitContext&);
+void EmitBindlessImageGatherDref(EmitContext&);
+void EmitBindlessImageFetch(EmitContext&);
+void EmitBindlessImageQueryDimensions(EmitContext&);
+void EmitBindlessImageQueryLod(EmitContext&);
+void EmitBindlessImageGradient(EmitContext&);
+void EmitBindlessImageRead(EmitContext&);
+void EmitBindlessImageWrite(EmitContext&);
+void EmitBoundImageSampleImplicitLod(EmitContext&);
+void EmitBoundImageSampleExplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+void EmitBoundImageGather(EmitContext&);
+void EmitBoundImageGatherDref(EmitContext&);
+void EmitBoundImageFetch(EmitContext&);
+void EmitBoundImageQueryDimensions(EmitContext&);
+void EmitBoundImageQueryLod(EmitContext&);
+void EmitBoundImageGradient(EmitContext&);
+void EmitBoundImageRead(EmitContext&);
+void EmitBoundImageWrite(EmitContext&);
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view bias_lc,
+ const IR::Value& offset);
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view lod_lc,
+ const IR::Value& offset);
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view dref,
+ std::string_view bias_lc, const IR::Value& offset);
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view dref,
+ std::string_view lod_lc, const IR::Value& offset);
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& offset, const IR::Value& offset2);
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
+ std::string_view dref);
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view offset, std::string_view lod,
+ std::string_view ms);
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view lod);
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords);
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp);
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords);
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view color);
+void EmitBindlessImageAtomicIAdd32(EmitContext&);
+void EmitBindlessImageAtomicSMin32(EmitContext&);
+void EmitBindlessImageAtomicUMin32(EmitContext&);
+void EmitBindlessImageAtomicSMax32(EmitContext&);
+void EmitBindlessImageAtomicUMax32(EmitContext&);
+void EmitBindlessImageAtomicInc32(EmitContext&);
+void EmitBindlessImageAtomicDec32(EmitContext&);
+void EmitBindlessImageAtomicAnd32(EmitContext&);
+void EmitBindlessImageAtomicOr32(EmitContext&);
+void EmitBindlessImageAtomicXor32(EmitContext&);
+void EmitBindlessImageAtomicExchange32(EmitContext&);
+void EmitBoundImageAtomicIAdd32(EmitContext&);
+void EmitBoundImageAtomicSMin32(EmitContext&);
+void EmitBoundImageAtomicUMin32(EmitContext&);
+void EmitBoundImageAtomicSMax32(EmitContext&);
+void EmitBoundImageAtomicUMax32(EmitContext&);
+void EmitBoundImageAtomicInc32(EmitContext&);
+void EmitBoundImageAtomicDec32(EmitContext&);
+void EmitBoundImageAtomicAnd32(EmitContext&);
+void EmitBoundImageAtomicOr32(EmitContext&);
+void EmitBoundImageAtomicXor32(EmitContext&);
+void EmitBoundImageAtomicExchange32(EmitContext&);
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask);
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
+ std::string_view clamp, std::string_view segmentation_mask);
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask);
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask);
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
+ std::string_view swizzle);
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
new file mode 100644
index 000000000..38419f88f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
@@ -0,0 +1,253 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
+ IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
+ if (!zero) {
+ return;
+ }
+ ctx.AddU1("{}={}==0;", *zero, result);
+ zero->Invalidate();
+}
+
+void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
+ IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
+ if (!sign) {
+ return;
+ }
+ ctx.AddU1("{}=int({})<0;", *sign, result);
+ sign->Invalidate();
+}
+
+void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ char lop) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}={}{}{};", result, a, lop, b);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+} // Anonymous namespace
+
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ // Compute the overflow CC first as it requires the original operand values,
+ // which may be overwritten by the result of the addition
+ if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
+ // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
+ constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
+ const auto sub_a{fmt::format("{}u-{}", s32_max, a)};
+ const auto positive_result{fmt::format("int({})>int({})", b, sub_a)};
+ const auto negative_result{fmt::format("int({})<int({})", b, sub_a)};
+ ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result);
+ overflow->Invalidate();
+ }
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
+ ctx.uses_cc_carry = true;
+ ctx.Add("{}=uaddCarry({},{},carry);", result, a, b);
+ ctx.AddU1("{}=carry!=0;", *carry);
+ carry->Invalidate();
+ } else {
+ ctx.Add("{}={}+{};", result, a, b);
+ }
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU64("{}={}+{};", inst, a, b);
+}
+
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}={}-{};", inst, a, b);
+}
+
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU64("{}={}-{};", inst, a, b);
+}
+
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=uint({}*{});", inst, a, b);
+}
+
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=uint(-({}));", inst, value);
+}
+
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=-({});", inst, value);
+}
+
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=abs(int({}));", inst, value);
+}
+
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU32("{}={}<<{};", inst, base, shift);
+}
+
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU64("{}={}<<{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU32("{}={}>>{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU64("{}={}>>{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU32("{}=int({})>>{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift);
+}
+
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ BitwiseLogicalOp(ctx, inst, a, b, '&');
+}
+
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ BitwiseLogicalOp(ctx, inst, a, b, '|');
+}
+
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ BitwiseLogicalOp(ctx, inst, a, b, '^');
+}
+
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view insert, std::string_view offset, std::string_view count) {
+ ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count);
+}
+
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view offset, std::string_view count) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view offset, std::string_view count) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=bitfieldReverse({});", inst, value);
+}
+
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=bitCount({});", inst, value);
+}
+
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=~{};", inst, value);
+}
+
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=findMSB(int({}));", inst, value);
+}
+
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=findMSB(uint({}));", inst, value);
+}
+
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=min(int({}),int({}));", inst, a, b);
+}
+
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b);
+}
+
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=max(int({}),int({}));", inst, a, b);
+}
+
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b);
+}
+
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+ std::string_view max) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+ std::string_view max) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+ ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs);
+}
+
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+ ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs);
+}
+
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+ ctx.AddU1("{}={}=={};", inst, lhs, rhs);
+}
+
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs);
+}
+
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs);
+}
+
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs);
+}
+
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs);
+}
+
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+ ctx.AddU1("{}={}!={};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs);
+}
+
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
new file mode 100644
index 000000000..338ff4bd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU1("{}={}||{};", inst, a, b);
+}
+
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU1("{}={}&&{};", inst, a, b);
+}
+
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU1("{}={}^^{};", inst, a, b);
+}
+
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU1("{}=!{};", inst, value);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
new file mode 100644
index 000000000..e3957491f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
@@ -0,0 +1,202 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
+ "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
+ "if(cas_result==old_value){{break;}}}}"};
+
+void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var,
+ std::string_view value, std::string_view bit_offset, u32 num_bits) {
+ const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)};
+ ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits);
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitLoadGlobalS8(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitLoadGlobalU16(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitLoadGlobalS16(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+ if (ctx.profile.support_int64) {
+ return ctx.AddU32("{}=LoadGlobal32({});", inst, address);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+ ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+ if (ctx.profile.support_int64) {
+ return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+ ctx.AddU32x2("{}=uvec2(0);", inst);
+}
+
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+ if (ctx.profile.support_int64) {
+ return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+ ctx.AddU32x4("{}=uvec4(0);", inst);
+}
+
+void EmitWriteGlobalU8(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitWriteGlobalS8(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitWriteGlobalU16(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitWriteGlobalS16(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) {
+ if (ctx.profile.support_int64) {
+ return ctx.Add("WriteGlobal32({},{});", address, value);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) {
+ if (ctx.profile.support_int64) {
+ return ctx.Add("WriteGlobal64({},{});", address, value);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) {
+ if (ctx.profile.support_int64) {
+ return ctx.Add("WriteGlobal128({},{});", address, value);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name,
+ binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name,
+ binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name,
+ binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst,
+ ctx.stage_name, binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var);
+}
+
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
+ binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
+}
+
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
+ "+12)>>2]);",
+ inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
+ offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,
+ binding.U32(), offset_var);
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
+ SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
+ SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
+ SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
+ SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value);
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
new file mode 100644
index 000000000..f420fe388
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+namespace Shader::Backend::GLSL {
+
+void EmitGetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetZeroFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSignFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCarryFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOverflowFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSparseFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetInBoundsFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
new file mode 100644
index 000000000..49fba9073
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+ [[maybe_unused]] std::string_view true_value,
+ [[maybe_unused]] std::string_view false_value) {
+ NotImplemented();
+}
+
+void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+ [[maybe_unused]] std::string_view true_value,
+ [[maybe_unused]] std::string_view false_value) {
+ NotImplemented();
+}
+
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+ [[maybe_unused]] std::string_view true_value,
+ [[maybe_unused]] std::string_view false_value) {
+ NotImplemented();
+}
+
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
new file mode 100644
index 000000000..518b78f06
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
@@ -0,0 +1,79 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
+ "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
+ "if(cas_result==old_value){{break;}}}}"};
+
+void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value,
+ std::string_view bit_offset, u32 num_bits) {
+ const auto smem{fmt::format("smem[{}>>2]", offset)};
+ ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits);
+}
+} // Anonymous namespace
+
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset);
+}
+
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset);
+}
+
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset);
+}
+
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset);
+}
+
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=smem[{}>>2];", inst, offset);
+}
+
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
+}
+
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
+ offset, offset, offset, offset);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ const auto bit_offset{fmt::format("int({}%4)*8", offset)};
+ SharedWriteCas(ctx, offset, value, bit_offset, 8);
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)};
+ SharedWriteCas(ctx, offset, value, bit_offset, 16);
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ ctx.Add("smem[{}>>2]={};", offset, value);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ ctx.Add("smem[{}>>2]={}.x;", offset, value);
+ ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ ctx.Add("smem[{}>>2]={}.x;", offset, value);
+ ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
+ ctx.Add("smem[({}+8)>>2]={}.z;", offset, value);
+ ctx.Add("smem[({}+12)>>2]={}.w;", offset, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
new file mode 100644
index 000000000..9b866f889
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -0,0 +1,111 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string_view OutputVertexIndex(EmitContext& ctx) {
+ return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
+}
+
+void InitializeOutputVaryings(EmitContext& ctx) {
+ if (ctx.uses_geometry_passthrough) {
+ return;
+ }
+ if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
+ ctx.Add("gl_Position=vec4(0,0,0,1);");
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!ctx.info.stores.Generic(index)) {
+ continue;
+ }
+ const auto& info_array{ctx.output_generics.at(index)};
+ const auto output_decorator{OutputVertexIndex(ctx)};
+ size_t element{};
+ while (element < info_array.size()) {
+ const auto& info{info_array.at(element)};
+ const auto varying_name{fmt::format("{}{}", info.name, output_decorator)};
+ switch (info.num_components) {
+ case 1: {
+ const char value{element == 3 ? '1' : '0'};
+ ctx.Add("{}={}.f;", varying_name, value);
+ break;
+ }
+ case 2:
+ case 3:
+ if (element + info.num_components < 4) {
+ ctx.Add("{}=vec{}(0);", varying_name, info.num_components);
+ } else {
+ // last element is the w component, must be initialized to 1
+ const auto zeros{info.num_components == 3 ? "0,0," : "0,"};
+ ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros);
+ }
+ break;
+ case 4:
+ ctx.Add("{}=vec4(0,0,0,1);", varying_name);
+ break;
+ default:
+ break;
+ }
+ element += info.num_components;
+ }
+ }
+}
+} // Anonymous namespace
+
+void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ ctx.var_alloc.Consume(phi.Arg(i));
+ }
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type());
+ }
+}
+
+void EmitVoid(EmitContext&) {}
+
+void EmitReference(EmitContext& ctx, const IR::Value& value) {
+ ctx.var_alloc.Consume(value);
+}
+
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
+ IR::Inst& phi{*phi_value.InstRecursive()};
+ const auto phi_type{phi.Arg(0).Type()};
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ ctx.var_alloc.PhiDefine(phi, phi_type);
+ }
+ const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})};
+ const auto val_reg{ctx.var_alloc.Consume(value)};
+ if (phi_reg == val_reg) {
+ return;
+ }
+ ctx.Add("{}={};", phi_reg, val_reg);
+}
+
+void EmitPrologue(EmitContext& ctx) {
+ InitializeOutputVaryings(ctx);
+}
+
+void EmitEpilogue(EmitContext&) {}
+
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
+ ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
+ InitializeOutputVaryings(ctx);
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+ ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream));
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
new file mode 100644
index 000000000..15bf02dd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU1("{}=false;", inst);
+}
+
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU64("{}=0u;", inst);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
new file mode 100644
index 000000000..a982dd8a2
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -0,0 +1,217 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
+ IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+ if (!in_bounds) {
+ return;
+ }
+ ctx.AddU1("{}=shfl_in_bounds;", *in_bounds);
+ in_bounds->Invalidate();
+}
+
+std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) {
+ return fmt::format("({}&{})", thread_id, segmentation_mask);
+}
+
+std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp,
+ std::string_view not_seg_mask) {
+ return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask);
+}
+
+std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp,
+ std::string_view segmentation_mask) {
+ const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
+ const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
+ return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask);
+}
+
+void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
+ std::string_view value, std::string_view index,
+ [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) {
+ const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)};
+ ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
+ SetInBoundsFlag(ctx, inst);
+}
+} // Anonymous namespace
+
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
+}
+
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+ ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
+ }
+}
+
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+ ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
+ }
+}
+
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+ const auto value{fmt::format("({}^{})", ballot, active_mask)};
+ ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
+ }
+}
+
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
+ } else {
+ ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
+ }
+}
+
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
+}
+
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
+}
+
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
+}
+
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
+}
+
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
+}
+
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask) {
+ if (ctx.profile.support_gl_warp_intrinsics) {
+ UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
+ return;
+ }
+ const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
+ const auto thread_id{"gl_SubGroupInvocationARB"};
+ const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
+ const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
+
+ const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
+ const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
+ ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+ SetInBoundsFlag(ctx, inst);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
+ std::string_view clamp, std::string_view segmentation_mask) {
+ if (ctx.profile.support_gl_warp_intrinsics) {
+ UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
+ return;
+ }
+ const auto thread_id{"gl_SubGroupInvocationARB"};
+ const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+ const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
+ ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
+ SetInBoundsFlag(ctx, inst);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask) {
+ if (ctx.profile.support_gl_warp_intrinsics) {
+ UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
+ return;
+ }
+ const auto thread_id{"gl_SubGroupInvocationARB"};
+ const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+ const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
+ ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+ SetInBoundsFlag(ctx, inst);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask) {
+ if (ctx.profile.support_gl_warp_intrinsics) {
+ UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
+ return;
+ }
+ const auto thread_id{"gl_SubGroupInvocationARB"};
+ const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+ const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
+ ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+ SetInBoundsFlag(ctx, inst);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
+ std::string_view swizzle) {
+ const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)};
+ const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask);
+ const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask);
+ ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b);
+}
+
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+ if (ctx.profile.support_gl_derivative_control) {
+ ctx.AddF32("{}=dFdxFine({});", inst, op_a);
+ } else {
+ LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx");
+ ctx.AddF32("{}=dFdx({});", inst, op_a);
+ }
+}
+
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+ if (ctx.profile.support_gl_derivative_control) {
+ ctx.AddF32("{}=dFdyFine({});", inst, op_a);
+ } else {
+ LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy");
+ ctx.AddF32("{}=dFdy({});", inst, op_a);
+ }
+}
+
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+ if (ctx.profile.support_gl_derivative_control) {
+ ctx.AddF32("{}=dFdxCoarse({});", inst, op_a);
+ } else {
+ LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx");
+ ctx.AddF32("{}=dFdx({});", inst, op_a);
+ }
+}
+
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+ if (ctx.profile.support_gl_derivative_control) {
+ ctx.AddF32("{}=dFdyCoarse({});", inst, op_a);
+ } else {
+ LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy");
+ ctx.AddF32("{}=dFdy({});", inst, op_a);
+ }
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp
new file mode 100644
index 000000000..194f926ca
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp
@@ -0,0 +1,308 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glsl/var_alloc.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string TypePrefix(GlslVarType type) {
+ switch (type) {
+ case GlslVarType::U1:
+ return "b_";
+ case GlslVarType::F16x2:
+ return "f16x2_";
+ case GlslVarType::U32:
+ return "u_";
+ case GlslVarType::F32:
+ return "f_";
+ case GlslVarType::U64:
+ return "u64_";
+ case GlslVarType::F64:
+ return "d_";
+ case GlslVarType::U32x2:
+ return "u2_";
+ case GlslVarType::F32x2:
+ return "f2_";
+ case GlslVarType::U32x3:
+ return "u3_";
+ case GlslVarType::F32x3:
+ return "f3_";
+ case GlslVarType::U32x4:
+ return "u4_";
+ case GlslVarType::F32x4:
+ return "f4_";
+ case GlslVarType::PrecF32:
+ return "pf_";
+ case GlslVarType::PrecF64:
+ return "pd_";
+ case GlslVarType::Void:
+ return "";
+ default:
+ throw NotImplementedException("Type {}", type);
+ }
+}
+
+std::string FormatFloat(std::string_view value, IR::Type type) {
+ // TODO: Confirm FP64 nan/inf
+ if (type == IR::Type::F32) {
+ if (value == "nan") {
+ return "utof(0x7fc00000)";
+ }
+ if (value == "inf") {
+ return "utof(0x7f800000)";
+ }
+ if (value == "-inf") {
+ return "utof(0xff800000)";
+ }
+ }
+ if (value.find_first_of('e') != std::string_view::npos) {
+ // scientific notation
+ const auto cast{type == IR::Type::F32 ? "float" : "double"};
+ return fmt::format("{}({})", cast, value);
+ }
+ const bool needs_dot{value.find_first_of('.') == std::string_view::npos};
+ const bool needs_suffix{!value.ends_with('f')};
+ const auto suffix{type == IR::Type::F32 ? "f" : "lf"};
+ return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : "");
+}
+
+std::string MakeImm(const IR::Value& value) {
+ switch (value.Type()) {
+ case IR::Type::U1:
+ return fmt::format("{}", value.U1() ? "true" : "false");
+ case IR::Type::U32:
+ return fmt::format("{}u", value.U32());
+ case IR::Type::F32:
+ return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32);
+ case IR::Type::U64:
+ return fmt::format("{}ul", value.U64());
+ case IR::Type::F64:
+ return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64);
+ case IR::Type::Void:
+ return "";
+ default:
+ throw NotImplementedException("Immediate type {}", value.Type());
+ }
+}
+} // Anonymous namespace
+
+std::string VarAlloc::Representation(u32 index, GlslVarType type) const {
+ const auto prefix{TypePrefix(type)};
+ return fmt::format("{}{}", prefix, index);
+}
+
+std::string VarAlloc::Representation(Id id) const {
+ return Representation(id.index, id.type);
+}
+
+std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) {
+ if (inst.HasUses()) {
+ inst.SetDefinition<Id>(Alloc(type));
+ return Representation(inst.Definition<Id>());
+ } else {
+ Id id{};
+ id.type.Assign(type);
+ GetUseTracker(type).uses_temp = true;
+ inst.SetDefinition<Id>(id);
+ return 't' + Representation(inst.Definition<Id>());
+ }
+}
+
+std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) {
+ return Define(inst, RegType(type));
+}
+
+std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) {
+ return AddDefine(inst, RegType(type));
+}
+
+std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) {
+ if (inst.HasUses()) {
+ inst.SetDefinition<Id>(Alloc(type));
+ return Representation(inst.Definition<Id>());
+ } else {
+ return "";
+ }
+ return Representation(inst.Definition<Id>());
+}
+
+std::string VarAlloc::Consume(const IR::Value& value) {
+ return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive());
+}
+
+std::string VarAlloc::ConsumeInst(IR::Inst& inst) {
+ inst.DestructiveRemoveUsage();
+ if (!inst.HasUses()) {
+ Free(inst.Definition<Id>());
+ }
+ return Representation(inst.Definition<Id>());
+}
+
+std::string VarAlloc::GetGlslType(IR::Type type) const {
+ return GetGlslType(RegType(type));
+}
+
+Id VarAlloc::Alloc(GlslVarType type) {
+ auto& use_tracker{GetUseTracker(type)};
+ const auto num_vars{use_tracker.var_use.size()};
+ for (size_t var = 0; var < num_vars; ++var) {
+ if (use_tracker.var_use[var]) {
+ continue;
+ }
+ use_tracker.num_used = std::max(use_tracker.num_used, var + 1);
+ use_tracker.var_use[var] = true;
+ Id ret{};
+ ret.is_valid.Assign(1);
+ ret.type.Assign(type);
+ ret.index.Assign(static_cast<u32>(var));
+ return ret;
+ }
+ // Allocate a new variable
+ use_tracker.var_use.push_back(true);
+ Id ret{};
+ ret.is_valid.Assign(1);
+ ret.type.Assign(type);
+ ret.index.Assign(static_cast<u32>(use_tracker.num_used));
+ ++use_tracker.num_used;
+ return ret;
+}
+
+void VarAlloc::Free(Id id) {
+ if (id.is_valid == 0) {
+ throw LogicError("Freeing invalid variable");
+ }
+ auto& use_tracker{GetUseTracker(id.type)};
+ use_tracker.var_use[id.index] = false;
+}
+
+GlslVarType VarAlloc::RegType(IR::Type type) const {
+ switch (type) {
+ case IR::Type::U1:
+ return GlslVarType::U1;
+ case IR::Type::U32:
+ return GlslVarType::U32;
+ case IR::Type::F32:
+ return GlslVarType::F32;
+ case IR::Type::U64:
+ return GlslVarType::U64;
+ case IR::Type::F64:
+ return GlslVarType::F64;
+ default:
+ throw NotImplementedException("IR type {}", type);
+ }
+}
+
+std::string VarAlloc::GetGlslType(GlslVarType type) const {
+ switch (type) {
+ case GlslVarType::U1:
+ return "bool";
+ case GlslVarType::F16x2:
+ return "f16vec2";
+ case GlslVarType::U32:
+ return "uint";
+ case GlslVarType::F32:
+ case GlslVarType::PrecF32:
+ return "float";
+ case GlslVarType::U64:
+ return "uint64_t";
+ case GlslVarType::F64:
+ case GlslVarType::PrecF64:
+ return "double";
+ case GlslVarType::U32x2:
+ return "uvec2";
+ case GlslVarType::F32x2:
+ return "vec2";
+ case GlslVarType::U32x3:
+ return "uvec3";
+ case GlslVarType::F32x3:
+ return "vec3";
+ case GlslVarType::U32x4:
+ return "uvec4";
+ case GlslVarType::F32x4:
+ return "vec4";
+ case GlslVarType::Void:
+ return "";
+ default:
+ throw NotImplementedException("Type {}", type);
+ }
+}
+
+VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) {
+ switch (type) {
+ case GlslVarType::U1:
+ return var_bool;
+ case GlslVarType::F16x2:
+ return var_f16x2;
+ case GlslVarType::U32:
+ return var_u32;
+ case GlslVarType::F32:
+ return var_f32;
+ case GlslVarType::U64:
+ return var_u64;
+ case GlslVarType::F64:
+ return var_f64;
+ case GlslVarType::U32x2:
+ return var_u32x2;
+ case GlslVarType::F32x2:
+ return var_f32x2;
+ case GlslVarType::U32x3:
+ return var_u32x3;
+ case GlslVarType::F32x3:
+ return var_f32x3;
+ case GlslVarType::U32x4:
+ return var_u32x4;
+ case GlslVarType::F32x4:
+ return var_f32x4;
+ case GlslVarType::PrecF32:
+ return var_precf32;
+ case GlslVarType::PrecF64:
+ return var_precf64;
+ default:
+ throw NotImplementedException("Type {}", type);
+ }
+}
+
+const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const {
+ switch (type) {
+ case GlslVarType::U1:
+ return var_bool;
+ case GlslVarType::F16x2:
+ return var_f16x2;
+ case GlslVarType::U32:
+ return var_u32;
+ case GlslVarType::F32:
+ return var_f32;
+ case GlslVarType::U64:
+ return var_u64;
+ case GlslVarType::F64:
+ return var_f64;
+ case GlslVarType::U32x2:
+ return var_u32x2;
+ case GlslVarType::F32x2:
+ return var_f32x2;
+ case GlslVarType::U32x3:
+ return var_u32x3;
+ case GlslVarType::F32x3:
+ return var_f32x3;
+ case GlslVarType::U32x4:
+ return var_u32x4;
+ case GlslVarType::F32x4:
+ return var_f32x4;
+ case GlslVarType::PrecF32:
+ return var_precf32;
+ case GlslVarType::PrecF64:
+ return var_precf64;
+ default:
+ throw NotImplementedException("Type {}", type);
+ }
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h
new file mode 100644
index 000000000..8b49f32a6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.h
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <string>
+#include <vector>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Shader::IR {
+class Inst;
+class Value;
+enum class Type;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+enum class GlslVarType : u32 {
+ U1,
+ F16x2,
+ U32,
+ F32,
+ U64,
+ F64,
+ U32x2,
+ F32x2,
+ U32x3,
+ F32x3,
+ U32x4,
+ F32x4,
+ PrecF32,
+ PrecF64,
+ Void,
+};
+
+struct Id {
+ union {
+ u32 raw;
+ BitField<0, 1, u32> is_valid;
+ BitField<1, 4, GlslVarType> type;
+ BitField<6, 26, u32> index;
+ };
+
+ bool operator==(Id rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+ bool operator!=(Id rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(sizeof(Id) == sizeof(u32));
+
+class VarAlloc {
+public:
+ struct UseTracker {
+ bool uses_temp{};
+ size_t num_used{};
+ std::vector<bool> var_use;
+ };
+
+ /// Used for explicit usages of variables, may revert to temporaries
+ std::string Define(IR::Inst& inst, GlslVarType type);
+ std::string Define(IR::Inst& inst, IR::Type type);
+
+ /// Used to assign variables used by the IR. May return a blank string if
+ /// the instruction's result is unused in the IR.
+ std::string AddDefine(IR::Inst& inst, GlslVarType type);
+ std::string PhiDefine(IR::Inst& inst, IR::Type type);
+
+ std::string Consume(const IR::Value& value);
+ std::string ConsumeInst(IR::Inst& inst);
+
+ std::string GetGlslType(GlslVarType type) const;
+ std::string GetGlslType(IR::Type type) const;
+
+ const UseTracker& GetUseTracker(GlslVarType type) const;
+ std::string Representation(u32 index, GlslVarType type) const;
+
+private:
+ GlslVarType RegType(IR::Type type) const;
+ Id Alloc(GlslVarType type);
+ void Free(Id id);
+ UseTracker& GetUseTracker(GlslVarType type);
+ std::string Representation(Id id) const;
+
+ UseTracker var_bool{};
+ UseTracker var_f16x2{};
+ UseTracker var_u32{};
+ UseTracker var_u32x2{};
+ UseTracker var_u32x3{};
+ UseTracker var_u32x4{};
+ UseTracker var_f32{};
+ UseTracker var_f32x2{};
+ UseTracker var_f32x3{};
+ UseTracker var_f32x4{};
+ UseTracker var_u64{};
+ UseTracker var_f64{};
+ UseTracker var_precf32{};
+ UseTracker var_precf64{};
+};
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
new file mode 100644
index 000000000..2d29d8c14
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -0,0 +1,1368 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "shader_recompiler/backend/spirv/emit_context.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+enum class Operation {
+ Increment,
+ Decrement,
+ FPAdd,
+ FPMin,
+ FPMax,
+};
+
+struct AttrInfo {
+ Id pointer;
+ Id id;
+ bool needs_cast;
+};
+
+Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
+ const spv::ImageFormat format{spv::ImageFormat::Unknown};
+ const Id type{ctx.F32[1]};
+ const bool depth{desc.is_depth};
+ switch (desc.type) {
+ case TextureType::Color1D:
+ return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
+ case TextureType::ColorArray1D:
+ return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
+ case TextureType::Color2D:
+ return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format);
+ case TextureType::ColorArray2D:
+ return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format);
+ case TextureType::Color3D:
+ return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
+ case TextureType::ColorCube:
+ return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
+ case TextureType::ColorArrayCube:
+ return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
+ case TextureType::Buffer:
+ break;
+ }
+ throw InvalidArgument("Invalid texture type {}", desc.type);
+}
+
+spv::ImageFormat GetImageFormat(ImageFormat format) {
+ switch (format) {
+ case ImageFormat::Typeless:
+ return spv::ImageFormat::Unknown;
+ case ImageFormat::R8_UINT:
+ return spv::ImageFormat::R8ui;
+ case ImageFormat::R8_SINT:
+ return spv::ImageFormat::R8i;
+ case ImageFormat::R16_UINT:
+ return spv::ImageFormat::R16ui;
+ case ImageFormat::R16_SINT:
+ return spv::ImageFormat::R16i;
+ case ImageFormat::R32_UINT:
+ return spv::ImageFormat::R32ui;
+ case ImageFormat::R32G32_UINT:
+ return spv::ImageFormat::Rg32ui;
+ case ImageFormat::R32G32B32A32_UINT:
+ return spv::ImageFormat::Rgba32ui;
+ }
+ throw InvalidArgument("Invalid image format {}", format);
+}
+
+Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) {
+ const spv::ImageFormat format{GetImageFormat(desc.format)};
+ const Id type{ctx.U32[1]};
+ switch (desc.type) {
+ case TextureType::Color1D:
+ return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format);
+ case TextureType::ColorArray1D:
+ return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format);
+ case TextureType::Color2D:
+ return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format);
+ case TextureType::ColorArray2D:
+ return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format);
+ case TextureType::Color3D:
+ return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format);
+ case TextureType::Buffer:
+ throw NotImplementedException("Image buffer");
+ default:
+ break;
+ }
+ throw InvalidArgument("Invalid texture type {}", desc.type);
+}
+
+Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin,
+ spv::StorageClass storage_class) {
+ const Id pointer_type{ctx.TypePointer(storage_class, type)};
+ const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)};
+ if (builtin) {
+ ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin);
+ }
+ ctx.interfaces.push_back(id);
+ return id;
+}
+
+u32 NumVertices(InputTopology input_topology) {
+ switch (input_topology) {
+ case InputTopology::Points:
+ return 1;
+ case InputTopology::Lines:
+ return 2;
+ case InputTopology::LinesAdjacency:
+ return 4;
+ case InputTopology::Triangles:
+ return 3;
+ case InputTopology::TrianglesAdjacency:
+ return 6;
+ }
+ throw InvalidArgument("Invalid input topology {}", input_topology);
+}
+
+Id DefineInput(EmitContext& ctx, Id type, bool per_invocation,
+ std::optional<spv::BuiltIn> builtin = std::nullopt) {
+ switch (ctx.stage) {
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ if (per_invocation) {
+ type = ctx.TypeArray(type, ctx.Const(32u));
+ }
+ break;
+ case Stage::Geometry:
+ if (per_invocation) {
+ const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)};
+ type = ctx.TypeArray(type, ctx.Const(num_vertices));
+ }
+ break;
+ default:
+ break;
+ }
+ return DefineVariable(ctx, type, builtin, spv::StorageClass::Input);
+}
+
+Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations,
+ std::optional<spv::BuiltIn> builtin = std::nullopt) {
+ if (invocations && ctx.stage == Stage::TessellationControl) {
+ type = ctx.TypeArray(type, ctx.Const(*invocations));
+ }
+ return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
+}
+
+void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) {
+ static constexpr std::string_view swizzle{"xyzw"};
+ const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+ u32 element{0};
+ while (element < 4) {
+ const u32 remainder{4 - element};
+ const TransformFeedbackVarying* xfb_varying{};
+ if (!ctx.runtime_info.xfb_varyings.empty()) {
+ xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element];
+ xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
+ }
+ const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
+
+ const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)};
+ ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+ if (element > 0) {
+ ctx.Decorate(id, spv::Decoration::Component, element);
+ }
+ if (xfb_varying) {
+ ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer);
+ ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride);
+ ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset);
+ }
+ if (num_components < 4 || element > 0) {
+ const std::string_view subswizzle{swizzle.substr(element, num_components)};
+ ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle));
+ } else {
+ ctx.Name(id, fmt::format("out_attr{}", index));
+ }
+ const GenericElementInfo info{
+ .id = id,
+ .first_element = element,
+ .num_components = num_components,
+ };
+ std::fill_n(ctx.output_generics[index].begin() + element, num_components, info);
+ element += num_components;
+ }
+}
+
+Id GetAttributeType(EmitContext& ctx, AttributeType type) {
+ switch (type) {
+ case AttributeType::Float:
+ return ctx.F32[4];
+ case AttributeType::SignedInt:
+ return ctx.TypeVector(ctx.TypeInt(32, true), 4);
+ case AttributeType::UnsignedInt:
+ return ctx.U32[4];
+ case AttributeType::Disabled:
+ break;
+ }
+ throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
+ const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
+ switch (type) {
+ case AttributeType::Float:
+ return AttrInfo{ctx.input_f32, ctx.F32[1], false};
+ case AttributeType::UnsignedInt:
+ return AttrInfo{ctx.input_u32, ctx.U32[1], true};
+ case AttributeType::SignedInt:
+ return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
+ case AttributeType::Disabled:
+ return std::nullopt;
+ }
+ throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+std::string_view StageName(Stage stage) {
+ switch (stage) {
+ case Stage::VertexA:
+ return "vs_a";
+ case Stage::VertexB:
+ return "vs";
+ case Stage::TessellationControl:
+ return "tcs";
+ case Stage::TessellationEval:
+ return "tes";
+ case Stage::Geometry:
+ return "gs";
+ case Stage::Fragment:
+ return "fs";
+ case Stage::Compute:
+ return "cs";
+ }
+ throw InvalidArgument("Invalid stage {}", stage);
+}
+
+template <typename... Args>
+void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) {
+ ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage),
+ std::forward<Args>(args)...)
+ .c_str());
+}
+
+void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type,
+ u32 binding, Id type, char type_char, u32 element_size) {
+ const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))};
+ ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+ const Id struct_type{ctx.TypeStruct(array_type)};
+ Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT);
+ ctx.Decorate(struct_type, spv::Decoration::Block);
+ ctx.MemberName(struct_type, 0, "data");
+ ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
+
+ const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)};
+ const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)};
+ ctx.uniform_types.*member_type = uniform_type;
+
+ for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
+ const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)};
+ ctx.Decorate(id, spv::Decoration::Binding, binding);
+ ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ ctx.Name(id, fmt::format("c{}", desc.index));
+ for (size_t i = 0; i < desc.count; ++i) {
+ ctx.cbufs[desc.index + i].*member_type = id;
+ }
+ if (ctx.profile.supported_spirv >= 0x00010400) {
+ ctx.interfaces.push_back(id);
+ }
+ binding += desc.count;
+ }
+}
+
+void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
+ Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type,
+ u32 stride) {
+ const Id array_type{ctx.TypeRuntimeArray(type)};
+ ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride);
+
+ const Id struct_type{ctx.TypeStruct(array_type)};
+ ctx.Decorate(struct_type, spv::Decoration::Block);
+ ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
+
+ const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
+ type_def.array = struct_pointer;
+ type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type);
+
+ u32 index{};
+ for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
+ const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)};
+ ctx.Decorate(id, spv::Decoration::Binding, binding);
+ ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ ctx.Name(id, fmt::format("ssbo{}", index));
+ if (ctx.profile.supported_spirv >= 0x00010400) {
+ ctx.interfaces.push_back(id);
+ }
+ for (size_t i = 0; i < desc.count; ++i) {
+ ctx.ssbos[index + i].*member_type = id;
+ }
+ index += desc.count;
+ binding += desc.count;
+ }
+}
+
+Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) {
+ const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)};
+ const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
+ const Id op_a{ctx.OpFunctionParameter(value_type)};
+ const Id op_b{ctx.OpFunctionParameter(value_type)};
+ ctx.AddLabel();
+ Id result{};
+ switch (operation) {
+ case Operation::Increment: {
+ const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)};
+ const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))};
+ result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr);
+ break;
+ }
+ case Operation::Decrement: {
+ const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))};
+ const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)};
+ const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)};
+ const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))};
+ result = ctx.OpSelect(value_type, pred, op_b, decr);
+ break;
+ }
+ case Operation::FPAdd:
+ result = ctx.OpFAdd(value_type, op_a, op_b);
+ break;
+ case Operation::FPMin:
+ result = ctx.OpFMin(value_type, op_a, op_b);
+ break;
+ case Operation::FPMax:
+ result = ctx.OpFMax(value_type, op_a, op_b);
+ break;
+ default:
+ break;
+ }
+ ctx.OpReturnValue(result);
+ ctx.OpFunctionEnd();
+ return func;
+}
+
+Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer,
+ Id value_type, Id memory_type, spv::Scope scope) {
+ const bool is_shared{scope == spv::Scope::Workgroup};
+ const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout};
+ const Id cas_func{CasFunction(ctx, operation, value_type)};
+ const Id zero{ctx.u32_zero_value};
+ const Id scope_id{ctx.Const(static_cast<u32>(scope))};
+
+ const Id loop_header{ctx.OpLabel()};
+ const Id continue_block{ctx.OpLabel()};
+ const Id merge_block{ctx.OpLabel()};
+ const Id func_type{is_shared
+ ? ctx.TypeFunction(value_type, ctx.U32[1], value_type)
+ : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)};
+
+ const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
+ const Id index{ctx.OpFunctionParameter(ctx.U32[1])};
+ const Id op_b{ctx.OpFunctionParameter(value_type)};
+ const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)};
+ ctx.AddLabel();
+ ctx.OpBranch(loop_header);
+ ctx.AddLabel(loop_header);
+
+ ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+ ctx.OpBranch(continue_block);
+
+ ctx.AddLabel(continue_block);
+ const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index)
+ : ctx.OpAccessChain(element_pointer, base, index)};
+ if (value_type.value == ctx.F32[2].value) {
+ const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)};
+ const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)};
+ const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)};
+ const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)};
+ const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
+ zero, u32_new_value, u32_value)};
+ const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)};
+ ctx.OpBranchConditional(success, merge_block, loop_header);
+
+ ctx.AddLabel(merge_block);
+ ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res));
+ } else {
+ const Id value{ctx.OpLoad(memory_type, word_pointer)};
+ const bool matching_type{value_type.value == memory_type.value};
+ const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)};
+ const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)};
+ const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)};
+ const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
+ zero, new_value, value)};
+ const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)};
+ ctx.OpBranchConditional(success, merge_block, loop_header);
+
+ ctx.AddLabel(merge_block);
+ ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res));
+ }
+ ctx.OpFunctionEnd();
+ return func;
+}
+
+template <typename Desc>
+std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) {
+ if (desc.count > 1) {
+ return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index,
+ desc.cbuf_offset, desc.count);
+ } else {
+ return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index,
+ desc.cbuf_offset);
+ }
+}
+
+Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
+ if (count > 1) {
+ const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))};
+ return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type);
+ } else {
+ return pointer_type;
+ }
+}
+} // Anonymous namespace
+
+void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
+ defs[0] = sirit_ctx.Name(base_type, name);
+
+ std::array<char, 6> def_name;
+ for (int i = 1; i < 4; ++i) {
+ const std::string_view def_name_view(
+ def_name.data(),
+ fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
+ defs[static_cast<size_t>(i)] =
+ sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
+ }
+}
+
+EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
+ IR::Program& program, Bindings& bindings)
+ : Sirit::Module(profile_.supported_spirv), profile{profile_},
+ runtime_info{runtime_info_}, stage{program.stage} {
+ const bool is_unified{profile.unified_descriptor_binding};
+ u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
+ u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
+ u32& texture_binding{is_unified ? bindings.unified : bindings.texture};
+ u32& image_binding{is_unified ? bindings.unified : bindings.image};
+ AddCapability(spv::Capability::Shader);
+ DefineCommonTypes(program.info);
+ DefineCommonConstants();
+ DefineInterfaces(program);
+ DefineLocalMemory(program);
+ DefineSharedMemory(program);
+ DefineSharedMemoryFunctions(program);
+ DefineConstantBuffers(program.info, uniform_binding);
+ DefineStorageBuffers(program.info, storage_binding);
+ DefineTextureBuffers(program.info, texture_binding);
+ DefineImageBuffers(program.info, image_binding);
+ DefineTextures(program.info, texture_binding);
+ DefineImages(program.info, image_binding);
+ DefineAttributeMemAccess(program.info);
+ DefineGlobalMemoryFunctions(program.info);
+}
+
+EmitContext::~EmitContext() = default;
+
+Id EmitContext::Def(const IR::Value& value) {
+ if (!value.IsImmediate()) {
+ return value.InstRecursive()->Definition<Id>();
+ }
+ switch (value.Type()) {
+ case IR::Type::Void:
+ // Void instructions are used for optional arguments (e.g. texture offsets)
+ // They are not meant to be used in the SPIR-V module
+ return Id{};
+ case IR::Type::U1:
+ return value.U1() ? true_value : false_value;
+ case IR::Type::U32:
+ return Const(value.U32());
+ case IR::Type::U64:
+ return Constant(U64, value.U64());
+ case IR::Type::F32:
+ return Const(value.F32());
+ case IR::Type::F64:
+ return Constant(F64[1], value.F64());
+ default:
+ throw NotImplementedException("Immediate type {}", value.Type());
+ }
+}
+
+Id EmitContext::BitOffset8(const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ return Const((offset.U32() % 4) * 8);
+ }
+ return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u));
+}
+
+Id EmitContext::BitOffset16(const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ return Const(((offset.U32() / 2) % 2) * 16);
+ }
+ return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u));
+}
+
+void EmitContext::DefineCommonTypes(const Info& info) {
+ void_id = TypeVoid();
+
+ U1 = Name(TypeBool(), "u1");
+
+ F32.Define(*this, TypeFloat(32), "f32");
+ U32.Define(*this, TypeInt(32, false), "u32");
+ S32.Define(*this, TypeInt(32, true), "s32");
+
+ private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
+
+ input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
+ input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
+ input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
+
+ output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
+ output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
+
+ if (info.uses_int8 && profile.support_int8) {
+ AddCapability(spv::Capability::Int8);
+ U8 = Name(TypeInt(8, false), "u8");
+ S8 = Name(TypeInt(8, true), "s8");
+ }
+ if (info.uses_int16 && profile.support_int16) {
+ AddCapability(spv::Capability::Int16);
+ U16 = Name(TypeInt(16, false), "u16");
+ S16 = Name(TypeInt(16, true), "s16");
+ }
+ if (info.uses_int64) {
+ AddCapability(spv::Capability::Int64);
+ U64 = Name(TypeInt(64, false), "u64");
+ }
+ if (info.uses_fp16) {
+ AddCapability(spv::Capability::Float16);
+ F16.Define(*this, TypeFloat(16), "f16");
+ }
+ if (info.uses_fp64) {
+ AddCapability(spv::Capability::Float64);
+ F64.Define(*this, TypeFloat(64), "f64");
+ }
+}
+
+void EmitContext::DefineCommonConstants() {
+ true_value = ConstantTrue(U1);
+ false_value = ConstantFalse(U1);
+ u32_zero_value = Const(0U);
+ f32_zero_value = Const(0.0f);
+}
+
+void EmitContext::DefineInterfaces(const IR::Program& program) {
+ DefineInputs(program);
+ DefineOutputs(program);
+}
+
+void EmitContext::DefineLocalMemory(const IR::Program& program) {
+ if (program.local_memory_size == 0) {
+ return;
+ }
+ const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
+ const Id type{TypeArray(U32[1], Const(num_elements))};
+ const Id pointer{TypePointer(spv::StorageClass::Private, type)};
+ local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(local_memory);
+ }
+}
+
+void EmitContext::DefineSharedMemory(const IR::Program& program) {
+ if (program.shared_memory_size == 0) {
+ return;
+ }
+ const auto make{[&](Id element_type, u32 element_size) {
+ const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
+ const Id array_type{TypeArray(element_type, Const(num_elements))};
+ Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+ const Id struct_type{TypeStruct(array_type)};
+ MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
+ Decorate(struct_type, spv::Decoration::Block);
+
+ const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
+ const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
+ const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
+ Decorate(variable, spv::Decoration::Aliased);
+ interfaces.push_back(variable);
+
+ return std::make_tuple(variable, element_pointer, pointer);
+ }};
+ if (profile.support_explicit_workgroup_layout) {
+ AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
+ if (program.info.uses_int8) {
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
+ std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
+ }
+ if (program.info.uses_int16) {
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
+ std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
+ }
+ if (program.info.uses_int64) {
+ std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8);
+ }
+ std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
+ std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
+ std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
+ return;
+ }
+ const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
+ const Id type{TypeArray(U32[1], Const(num_elements))};
+ shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
+
+ shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
+ shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
+ interfaces.push_back(shared_memory_u32);
+
+ const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
+ const auto make_function{[&](u32 mask, u32 size) {
+ const Id loop_header{OpLabel()};
+ const Id continue_block{OpLabel()};
+ const Id merge_block{OpLabel()};
+
+ const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
+ const Id offset{OpFunctionParameter(U32[1])};
+ const Id insert_value{OpFunctionParameter(U32[1])};
+ AddLabel();
+ OpBranch(loop_header);
+
+ AddLabel(loop_header);
+ const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+ const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))};
+ const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))};
+ const Id count{Const(size)};
+ OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+ OpBranch(continue_block);
+
+ AddLabel(continue_block);
+ const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
+ const Id old_value{OpLoad(U32[1], word_pointer)};
+ const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
+ const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value,
+ u32_zero_value, new_value, old_value)};
+ const Id success{OpIEqual(U1, atomic_res, old_value)};
+ OpBranchConditional(success, merge_block, loop_header);
+
+ AddLabel(merge_block);
+ OpReturn();
+ OpFunctionEnd();
+ return func;
+ }};
+ if (program.info.uses_int8) {
+ shared_store_u8_func = make_function(24, 8);
+ }
+ if (program.info.uses_int16) {
+ shared_store_u16_func = make_function(16, 16);
+ }
+}
+
+void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) {
+ if (program.info.uses_shared_increment) {
+ increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type,
+ shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
+ }
+ if (program.info.uses_shared_decrement) {
+ decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type,
+ shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
+ }
+}
+
+void EmitContext::DefineAttributeMemAccess(const Info& info) {
+ const auto make_load{[&] {
+ const bool is_array{stage == Stage::Geometry};
+ const Id end_block{OpLabel()};
+ const Id default_label{OpLabel()};
+
+ const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1])
+ : TypeFunction(F32[1], U32[1])};
+ const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)};
+ const Id offset{OpFunctionParameter(U32[1])};
+ const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}};
+
+ AddLabel();
+ const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+ const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
+ const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
+ std::vector<Sirit::Literal> literals;
+ std::vector<Id> labels;
+ if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
+ labels.push_back(OpLabel());
+ }
+ const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+ for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
+ if (!info.loads.Generic(index)) {
+ continue;
+ }
+ literals.push_back(base_attribute_value + index);
+ labels.push_back(OpLabel());
+ }
+ OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
+ OpSwitch(compare_index, default_label, literals, labels);
+ AddLabel(default_label);
+ OpReturnValue(Const(0.0f));
+ size_t label_index{0};
+ if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ AddLabel(labels[label_index]);
+ const Id pointer{is_array
+ ? OpAccessChain(input_f32, input_position, vertex, masked_index)
+ : OpAccessChain(input_f32, input_position, masked_index)};
+ const Id result{OpLoad(F32[1], pointer)};
+ OpReturnValue(result);
+ ++label_index;
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.loads.Generic(index)) {
+ continue;
+ }
+ AddLabel(labels[label_index]);
+ const auto type{AttrTypes(*this, static_cast<u32>(index))};
+ if (!type) {
+ OpReturnValue(Const(0.0f));
+ ++label_index;
+ continue;
+ }
+ const Id generic_id{input_generics.at(index)};
+ const Id pointer{is_array
+ ? OpAccessChain(type->pointer, generic_id, vertex, masked_index)
+ : OpAccessChain(type->pointer, generic_id, masked_index)};
+ const Id value{OpLoad(type->id, pointer)};
+ const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
+ OpReturnValue(result);
+ ++label_index;
+ }
+ AddLabel(end_block);
+ OpUnreachable();
+ OpFunctionEnd();
+ return func;
+ }};
+ const auto make_store{[&] {
+ const Id end_block{OpLabel()};
+ const Id default_label{OpLabel()};
+
+ const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])};
+ const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)};
+ const Id offset{OpFunctionParameter(U32[1])};
+ const Id store_value{OpFunctionParameter(F32[1])};
+ AddLabel();
+ const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+ const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
+ const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
+ std::vector<Sirit::Literal> literals;
+ std::vector<Id> labels;
+ if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
+ literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
+ labels.push_back(OpLabel());
+ }
+ const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.stores.Generic(index)) {
+ continue;
+ }
+ literals.push_back(base_attribute_value + static_cast<u32>(index));
+ labels.push_back(OpLabel());
+ }
+ if (info.stores.ClipDistances()) {
+ literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2);
+ labels.push_back(OpLabel());
+ literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2);
+ labels.push_back(OpLabel());
+ }
+ OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
+ OpSwitch(compare_index, default_label, literals, labels);
+ AddLabel(default_label);
+ OpReturn();
+ size_t label_index{0};
+ if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
+ AddLabel(labels[label_index]);
+ const Id pointer{OpAccessChain(output_f32, output_position, masked_index)};
+ OpStore(pointer, store_value);
+ OpReturn();
+ ++label_index;
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.stores.Generic(index)) {
+ continue;
+ }
+ if (output_generics[index][0].num_components != 4) {
+ throw NotImplementedException("Physical stores and transform feedbacks");
+ }
+ AddLabel(labels[label_index]);
+ const Id generic_id{output_generics[index][0].id};
+ const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)};
+ OpStore(pointer, store_value);
+ OpReturn();
+ ++label_index;
+ }
+ if (info.stores.ClipDistances()) {
+ AddLabel(labels[label_index]);
+ const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)};
+ OpStore(pointer, store_value);
+ OpReturn();
+ ++label_index;
+ AddLabel(labels[label_index]);
+ const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))};
+ const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)};
+ OpStore(pointer2, store_value);
+ OpReturn();
+ ++label_index;
+ }
+ AddLabel(end_block);
+ OpUnreachable();
+ OpFunctionEnd();
+ return func;
+ }};
+ if (info.loads_indexed_attributes) {
+ indexed_load_func = make_load();
+ }
+ if (info.stores_indexed_attributes) {
+ indexed_store_func = make_store();
+ }
+}
+
+void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
+ if (!info.uses_global_memory || !profile.support_int64) {
+ return;
+ }
+ using DefPtr = Id StorageDefinitions::*;
+ const Id zero{u32_zero_value};
+ const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
+ auto&& callback) {
+ AddLabel();
+ const size_t num_buffers{info.storage_buffers_descriptors.size()};
+ for (size_t index = 0; index < num_buffers; ++index) {
+ if (!info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const auto& ssbo{info.storage_buffers_descriptors[index]};
+ const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
+ const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
+ const Id ssbo_addr_pointer{OpAccessChain(
+ uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)};
+ const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
+ zero, ssbo_size_cbuf_offset)};
+
+ const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
+ const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
+ const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
+ const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
+ OpULessThan(U1, addr, ssbo_end))};
+ const Id then_label{OpLabel()};
+ const Id else_label{OpLabel()};
+ OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
+ OpBranchConditional(cond, then_label, else_label);
+ AddLabel(then_label);
+ const Id ssbo_id{ssbos[index].*ssbo_member};
+ const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
+ const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
+ const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
+ callback(ssbo_pointer);
+ AddLabel(else_label);
+ }
+ }};
+ const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
+ const Id function_type{TypeFunction(type, U64)};
+ const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
+ const Id addr{OpFunctionParameter(U64)};
+ define_body(ssbo_member, addr, element_pointer, shift,
+ [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
+ OpReturnValue(ConstantNull(type));
+ OpFunctionEnd();
+ return func_id;
+ }};
+ const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
+ const Id function_type{TypeFunction(void_id, U64, type)};
+ const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
+ const Id addr{OpFunctionParameter(U64)};
+ const Id data{OpFunctionParameter(type)};
+ define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
+ OpStore(ssbo_pointer, data);
+ OpReturn();
+ });
+ OpReturn();
+ OpFunctionEnd();
+ return func_id;
+ }};
+ const auto define{
+ [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
+ const Id element_type{type_def.element};
+ const u32 shift{static_cast<u32>(std::countr_zero(size))};
+ const Id load_func{define_load(ssbo_member, element_type, type, shift)};
+ const Id write_func{define_write(ssbo_member, element_type, type, shift)};
+ return std::make_pair(load_func, write_func);
+ }};
+ std::tie(load_global_func_u32, write_global_func_u32) =
+ define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
+ std::tie(load_global_func_u32x2, write_global_func_u32x2) =
+ define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2]));
+ std::tie(load_global_func_u32x4, write_global_func_u32x4) =
+ define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
+}
+
+void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
+ if (info.constant_buffer_descriptors.empty()) {
+ return;
+ }
+ if (!profile.support_descriptor_aliasing) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u',
+ sizeof(u32[4]));
+ for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
+ binding += desc.count;
+ }
+ return;
+ }
+ IR::Type types{info.used_constant_buffer_types};
+ if (True(types & IR::Type::U8)) {
+ if (profile.support_int8) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8));
+ DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8));
+ } else {
+ types |= IR::Type::U32;
+ }
+ }
+ if (True(types & IR::Type::U16)) {
+ if (profile.support_int16) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u',
+ sizeof(u16));
+ DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's',
+ sizeof(s16));
+ } else {
+ types |= IR::Type::U32;
+ }
+ }
+ if (True(types & IR::Type::U32)) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u',
+ sizeof(u32));
+ }
+ if (True(types & IR::Type::F32)) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f',
+ sizeof(f32));
+ }
+ if (True(types & IR::Type::U32x2)) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u',
+ sizeof(u32[2]));
+ }
+ binding += static_cast<u32>(info.constant_buffer_descriptors.size());
+}
+
+void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
+ if (info.storage_buffers_descriptors.empty()) {
+ return;
+ }
+ AddExtension("SPV_KHR_storage_buffer_storage_class");
+
+ const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types
+ : IR::Type::U32};
+ if (profile.support_int8 && True(used_types & IR::Type::U8)) {
+ DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8,
+ sizeof(u8));
+ DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8,
+ sizeof(u8));
+ }
+ if (profile.support_int16 && True(used_types & IR::Type::U16)) {
+ DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16,
+ sizeof(u16));
+ DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16,
+ sizeof(u16));
+ }
+ if (True(used_types & IR::Type::U32)) {
+ DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1],
+ sizeof(u32));
+ }
+ if (True(used_types & IR::Type::F32)) {
+ DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1],
+ sizeof(f32));
+ }
+ if (True(used_types & IR::Type::U64)) {
+ DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64,
+ sizeof(u64));
+ }
+ if (True(used_types & IR::Type::U32x2)) {
+ DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2],
+ sizeof(u32[2]));
+ }
+ if (True(used_types & IR::Type::U32x4)) {
+ DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4],
+ sizeof(u32[4]));
+ }
+ for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
+ binding += desc.count;
+ }
+ const bool needs_function{
+ info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add ||
+ info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max ||
+ info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max};
+ if (needs_function) {
+ AddCapability(spv::Capability::VariablePointersStorageBuffer);
+ }
+ if (info.uses_global_increment) {
+ increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array,
+ storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
+ }
+ if (info.uses_global_decrement) {
+ decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array,
+ storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f32_add) {
+ f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+ storage_types.U32.element, F32[1], U32[1], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f16x2_add) {
+ f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+ storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f16x2_min) {
+ f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
+ storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f16x2_max) {
+ f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
+ storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f32x2_add) {
+ f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+ storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f32x2_min) {
+ f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
+ storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f32x2_max) {
+ f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
+ storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+ }
+}
+
+void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
+ if (info.texture_buffer_descriptors.empty()) {
+ return;
+ }
+ const spv::ImageFormat format{spv::ImageFormat::Unknown};
+ image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format);
+ sampled_texture_buffer_type = TypeSampledImage(image_buffer_type);
+
+ const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)};
+ texture_buffers.reserve(info.texture_buffer_descriptors.size());
+ for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Array of texture buffers");
+ }
+ const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)};
+ Decorate(id, spv::Decoration::Binding, binding);
+ Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ Name(id, NameOf(stage, desc, "texbuf"));
+ texture_buffers.push_back({
+ .id = id,
+ .count = desc.count,
+ });
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
+ ++binding;
+ }
+}
+
+void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
+ image_buffers.reserve(info.image_buffer_descriptors.size());
+ for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Array of image buffers");
+ }
+ const spv::ImageFormat format{GetImageFormat(desc.format)};
+ const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)};
+ const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
+ const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
+ Decorate(id, spv::Decoration::Binding, binding);
+ Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ Name(id, NameOf(stage, desc, "imgbuf"));
+ image_buffers.push_back({
+ .id = id,
+ .image_type = image_type,
+ .count = desc.count,
+ });
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
+ ++binding;
+ }
+}
+
+void EmitContext::DefineTextures(const Info& info, u32& binding) {
+ textures.reserve(info.texture_descriptors.size());
+ for (const TextureDescriptor& desc : info.texture_descriptors) {
+ const Id image_type{ImageType(*this, desc)};
+ const Id sampled_type{TypeSampledImage(image_type)};
+ const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
+ const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
+ const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)};
+ Decorate(id, spv::Decoration::Binding, binding);
+ Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ Name(id, NameOf(stage, desc, "tex"));
+ textures.push_back({
+ .id = id,
+ .sampled_type = sampled_type,
+ .pointer_type = pointer_type,
+ .image_type = image_type,
+ .count = desc.count,
+ });
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
+ ++binding;
+ }
+ if (info.uses_atomic_image_u32) {
+ image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
+ }
+}
+
+void EmitContext::DefineImages(const Info& info, u32& binding) {
+ images.reserve(info.image_descriptors.size());
+ for (const ImageDescriptor& desc : info.image_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Array of images");
+ }
+ const Id image_type{ImageType(*this, desc)};
+ const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
+ const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
+ Decorate(id, spv::Decoration::Binding, binding);
+ Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ Name(id, NameOf(stage, desc, "img"));
+ images.push_back({
+ .id = id,
+ .image_type = image_type,
+ .count = desc.count,
+ });
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
+ ++binding;
+ }
+}
+
+void EmitContext::DefineInputs(const IR::Program& program) {
+ const Info& info{program.info};
+ const VaryingState loads{info.loads.mask | info.passthrough.mask};
+
+ if (info.uses_workgroup_id) {
+ workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
+ }
+ if (info.uses_local_invocation_id) {
+ local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId);
+ }
+ if (info.uses_invocation_id) {
+ invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId);
+ }
+ if (info.uses_sample_id) {
+ sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
+ }
+ if (info.uses_is_helper_invocation) {
+ is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
+ }
+ if (info.uses_subgroup_mask) {
+ subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
+ subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
+ subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
+ subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
+ subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
+ }
+ if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
+ (profile.warp_size_potentially_larger_than_guest &&
+ (info.uses_subgroup_vote || info.uses_subgroup_mask))) {
+ subgroup_local_invocation_id =
+ DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
+ }
+ if (info.uses_fswzadd) {
+ const Id f32_one{Const(1.0f)};
+ const Id f32_minus_one{Const(-1.0f)};
+ const Id f32_zero{Const(0.0f)};
+ fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero);
+ fswzadd_lut_b =
+ ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one);
+ }
+ if (loads[IR::Attribute::PrimitiveId]) {
+ primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
+ }
+ if (loads.AnyComponent(IR::Attribute::PositionX)) {
+ const bool is_fragment{stage != Stage::Fragment};
+ const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
+ input_position = DefineInput(*this, F32[4], true, built_in);
+ if (profile.support_geometry_shader_passthrough) {
+ if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
+ Decorate(input_position, spv::Decoration::PassthroughNV);
+ }
+ }
+ }
+ if (loads[IR::Attribute::InstanceId]) {
+ if (profile.support_vertex_instance_id) {
+ instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
+ } else {
+ instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
+ base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
+ }
+ }
+ if (loads[IR::Attribute::VertexId]) {
+ if (profile.support_vertex_instance_id) {
+ vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
+ } else {
+ vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
+ base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
+ }
+ }
+ if (loads[IR::Attribute::FrontFace]) {
+ front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
+ }
+ if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) {
+ point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord);
+ }
+ if (loads[IR::Attribute::TessellationEvaluationPointU] ||
+ loads[IR::Attribute::TessellationEvaluationPointV]) {
+ tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ const AttributeType input_type{runtime_info.generic_input_types[index]};
+ if (!runtime_info.previous_stage_stores.Generic(index)) {
+ continue;
+ }
+ if (!loads.Generic(index)) {
+ continue;
+ }
+ if (input_type == AttributeType::Disabled) {
+ continue;
+ }
+ const Id type{GetAttributeType(*this, input_type)};
+ const Id id{DefineInput(*this, type, true)};
+ Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+ Name(id, fmt::format("in_attr{}", index));
+ input_generics[index] = id;
+
+ if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) {
+ Decorate(id, spv::Decoration::PassthroughNV);
+ }
+ if (stage != Stage::Fragment) {
+ continue;
+ }
+ switch (info.interpolation[index]) {
+ case Interpolation::Smooth:
+ // Default
+ // Decorate(id, spv::Decoration::Smooth);
+ break;
+ case Interpolation::NoPerspective:
+ Decorate(id, spv::Decoration::NoPerspective);
+ break;
+ case Interpolation::Flat:
+ Decorate(id, spv::Decoration::Flat);
+ break;
+ }
+ }
+ if (stage == Stage::TessellationEval) {
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ const Id id{DefineInput(*this, F32[4], false)};
+ Decorate(id, spv::Decoration::Patch);
+ Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+ patches[index] = id;
+ }
+ }
+}
+
+void EmitContext::DefineOutputs(const IR::Program& program) {
+ const Info& info{program.info};
+ const std::optional<u32> invocations{program.invocations};
+ if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) {
+ output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position);
+ }
+ if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) {
+ if (stage == Stage::Fragment) {
+ throw NotImplementedException("Storing PointSize in fragment stage");
+ }
+ output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize);
+ }
+ if (info.stores.ClipDistances()) {
+ if (stage == Stage::Fragment) {
+ throw NotImplementedException("Storing ClipDistance in fragment stage");
+ }
+ const Id type{TypeArray(F32[1], Const(8U))};
+ clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance);
+ }
+ if (info.stores[IR::Attribute::Layer] &&
+ (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
+ if (stage == Stage::Fragment) {
+ throw NotImplementedException("Storing Layer in fragment stage");
+ }
+ layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer);
+ }
+ if (info.stores[IR::Attribute::ViewportIndex] &&
+ (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
+ if (stage == Stage::Fragment) {
+ throw NotImplementedException("Storing ViewportIndex in fragment stage");
+ }
+ viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex);
+ }
+ if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+ viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
+ spv::BuiltIn::ViewportMaskNV);
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (info.stores.Generic(index)) {
+ DefineGenericOutput(*this, index, invocations);
+ }
+ }
+ switch (stage) {
+ case Stage::TessellationControl:
+ if (info.stores_tess_level_outer) {
+ const Id type{TypeArray(F32[1], Const(4U))};
+ output_tess_level_outer =
+ DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter);
+ Decorate(output_tess_level_outer, spv::Decoration::Patch);
+ }
+ if (info.stores_tess_level_inner) {
+ const Id type{TypeArray(F32[1], Const(2U))};
+ output_tess_level_inner =
+ DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner);
+ Decorate(output_tess_level_inner, spv::Decoration::Patch);
+ }
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ const Id id{DefineOutput(*this, F32[4], std::nullopt)};
+ Decorate(id, spv::Decoration::Patch);
+ Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+ patches[index] = id;
+ }
+ break;
+ case Stage::Fragment:
+ for (u32 index = 0; index < 8; ++index) {
+ if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
+ continue;
+ }
+ frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
+ Decorate(frag_color[index], spv::Decoration::Location, index);
+ Name(frag_color[index], fmt::format("frag_color{}", index));
+ }
+ if (info.stores_frag_depth) {
+ frag_depth = DefineOutput(*this, F32[1], std::nullopt);
+ Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
+ }
+ if (info.stores_sample_mask) {
+ sample_mask = DefineOutput(*this, U32[1], std::nullopt);
+ Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
new file mode 100644
index 000000000..e277bc358
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -0,0 +1,307 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string_view>
+
+#include <sirit/sirit.h>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Backend::SPIRV {
+
+using Sirit::Id;
+
+class VectorTypes {
+public:
+ void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name);
+
+ [[nodiscard]] Id operator[](size_t size) const noexcept {
+ return defs[size - 1];
+ }
+
+private:
+ std::array<Id, 4> defs{};
+};
+
+struct TextureDefinition {
+ Id id;
+ Id sampled_type;
+ Id pointer_type;
+ Id image_type;
+ u32 count;
+};
+
+struct TextureBufferDefinition {
+ Id id;
+ u32 count;
+};
+
+struct ImageBufferDefinition {
+ Id id;
+ Id image_type;
+ u32 count;
+};
+
+struct ImageDefinition {
+ Id id;
+ Id image_type;
+ u32 count;
+};
+
+struct UniformDefinitions {
+ Id U8{};
+ Id S8{};
+ Id U16{};
+ Id S16{};
+ Id U32{};
+ Id F32{};
+ Id U32x2{};
+ Id U32x4{};
+};
+
+struct StorageTypeDefinition {
+ Id array{};
+ Id element{};
+};
+
+struct StorageTypeDefinitions {
+ StorageTypeDefinition U8{};
+ StorageTypeDefinition S8{};
+ StorageTypeDefinition U16{};
+ StorageTypeDefinition S16{};
+ StorageTypeDefinition U32{};
+ StorageTypeDefinition U64{};
+ StorageTypeDefinition F32{};
+ StorageTypeDefinition U32x2{};
+ StorageTypeDefinition U32x4{};
+};
+
+struct StorageDefinitions {
+ Id U8{};
+ Id S8{};
+ Id U16{};
+ Id S16{};
+ Id U32{};
+ Id F32{};
+ Id U64{};
+ Id U32x2{};
+ Id U32x4{};
+};
+
+struct GenericElementInfo {
+ Id id{};
+ u32 first_element{};
+ u32 num_components{};
+};
+
+class EmitContext final : public Sirit::Module {
+public:
+ explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& binding);
+ ~EmitContext();
+
+ [[nodiscard]] Id Def(const IR::Value& value);
+
+ [[nodiscard]] Id BitOffset8(const IR::Value& offset);
+ [[nodiscard]] Id BitOffset16(const IR::Value& offset);
+
+ Id Const(u32 value) {
+ return Constant(U32[1], value);
+ }
+
+ Id Const(u32 element_1, u32 element_2) {
+ return ConstantComposite(U32[2], Const(element_1), Const(element_2));
+ }
+
+ Id Const(u32 element_1, u32 element_2, u32 element_3) {
+ return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3));
+ }
+
+ Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) {
+ return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3),
+ Const(element_4));
+ }
+
+ Id SConst(s32 value) {
+ return Constant(S32[1], value);
+ }
+
+ Id SConst(s32 element_1, s32 element_2) {
+ return ConstantComposite(S32[2], SConst(element_1), SConst(element_2));
+ }
+
+ Id SConst(s32 element_1, s32 element_2, s32 element_3) {
+ return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3));
+ }
+
+ Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) {
+ return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3),
+ SConst(element_4));
+ }
+
+ Id Const(f32 value) {
+ return Constant(F32[1], value);
+ }
+
+ const Profile& profile;
+ const RuntimeInfo& runtime_info;
+ Stage stage{};
+
+ Id void_id{};
+ Id U1{};
+ Id U8{};
+ Id S8{};
+ Id U16{};
+ Id S16{};
+ Id U64{};
+ VectorTypes F32;
+ VectorTypes U32;
+ VectorTypes S32;
+ VectorTypes F16;
+ VectorTypes F64;
+
+ Id true_value{};
+ Id false_value{};
+ Id u32_zero_value{};
+ Id f32_zero_value{};
+
+ UniformDefinitions uniform_types;
+ StorageTypeDefinitions storage_types;
+
+ Id private_u32{};
+
+ Id shared_u8{};
+ Id shared_u16{};
+ Id shared_u32{};
+ Id shared_u64{};
+ Id shared_u32x2{};
+ Id shared_u32x4{};
+
+ Id input_f32{};
+ Id input_u32{};
+ Id input_s32{};
+
+ Id output_f32{};
+ Id output_u32{};
+
+ Id image_buffer_type{};
+ Id sampled_texture_buffer_type{};
+ Id image_u32{};
+
+ std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
+ std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{};
+ std::vector<TextureBufferDefinition> texture_buffers;
+ std::vector<ImageBufferDefinition> image_buffers;
+ std::vector<TextureDefinition> textures;
+ std::vector<ImageDefinition> images;
+
+ Id workgroup_id{};
+ Id local_invocation_id{};
+ Id invocation_id{};
+ Id sample_id{};
+ Id is_helper_invocation{};
+ Id subgroup_local_invocation_id{};
+ Id subgroup_mask_eq{};
+ Id subgroup_mask_lt{};
+ Id subgroup_mask_le{};
+ Id subgroup_mask_gt{};
+ Id subgroup_mask_ge{};
+ Id instance_id{};
+ Id instance_index{};
+ Id base_instance{};
+ Id vertex_id{};
+ Id vertex_index{};
+ Id base_vertex{};
+ Id front_face{};
+ Id point_coord{};
+ Id tess_coord{};
+ Id clip_distances{};
+ Id layer{};
+ Id viewport_index{};
+ Id viewport_mask{};
+ Id primitive_id{};
+
+ Id fswzadd_lut_a{};
+ Id fswzadd_lut_b{};
+
+ Id indexed_load_func{};
+ Id indexed_store_func{};
+
+ Id local_memory{};
+
+ Id shared_memory_u8{};
+ Id shared_memory_u16{};
+ Id shared_memory_u32{};
+ Id shared_memory_u64{};
+ Id shared_memory_u32x2{};
+ Id shared_memory_u32x4{};
+
+ Id shared_memory_u32_type{};
+
+ Id shared_store_u8_func{};
+ Id shared_store_u16_func{};
+ Id increment_cas_shared{};
+ Id increment_cas_ssbo{};
+ Id decrement_cas_shared{};
+ Id decrement_cas_ssbo{};
+ Id f32_add_cas{};
+ Id f16x2_add_cas{};
+ Id f16x2_min_cas{};
+ Id f16x2_max_cas{};
+ Id f32x2_add_cas{};
+ Id f32x2_min_cas{};
+ Id f32x2_max_cas{};
+
+ Id load_global_func_u32{};
+ Id load_global_func_u32x2{};
+ Id load_global_func_u32x4{};
+ Id write_global_func_u32{};
+ Id write_global_func_u32x2{};
+ Id write_global_func_u32x4{};
+
+ Id input_position{};
+ std::array<Id, 32> input_generics{};
+
+ Id output_point_size{};
+ Id output_position{};
+ std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
+
+ Id output_tess_level_outer{};
+ Id output_tess_level_inner{};
+ std::array<Id, 30> patches{};
+
+ std::array<Id, 8> frag_color{};
+ Id sample_mask{};
+ Id frag_depth{};
+
+ std::vector<Id> interfaces;
+
+private:
+ void DefineCommonTypes(const Info& info);
+ void DefineCommonConstants();
+ void DefineInterfaces(const IR::Program& program);
+ void DefineLocalMemory(const IR::Program& program);
+ void DefineSharedMemory(const IR::Program& program);
+ void DefineSharedMemoryFunctions(const IR::Program& program);
+ void DefineConstantBuffers(const Info& info, u32& binding);
+ void DefineStorageBuffers(const Info& info, u32& binding);
+ void DefineTextureBuffers(const Info& info, u32& binding);
+ void DefineImageBuffers(const Info& info, u32& binding);
+ void DefineTextures(const Info& info, u32& binding);
+ void DefineImages(const Info& info, u32& binding);
+ void DefineAttributeMemAccess(const Info& info);
+ void DefineGlobalMemoryFunctions(const Info& info);
+
+ void DefineInputs(const IR::Program& program);
+ void DefineOutputs(const IR::Program& program);
+};
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
new file mode 100644
index 000000000..d7a86e270
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -0,0 +1,541 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <span>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "common/settings.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+ using ReturnType = ReturnType_;
+
+ static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <auto func, typename... Args>
+void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
+ inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
+}
+
+template <typename ArgType>
+ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
+ if constexpr (std::is_same_v<ArgType, Id>) {
+ return ctx.Def(arg);
+ } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+ return arg;
+ } else if constexpr (std::is_same_v<ArgType, u32>) {
+ return arg.U32();
+ } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+ return arg.Attribute();
+ } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+ return arg.Patch();
+ } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+ return arg.Reg();
+ }
+}
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+ using Traits = FuncTraits<decltype(func)>;
+ if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
+ if constexpr (is_first_arg_inst) {
+ SetDefinition<func>(
+ ctx, inst, inst,
+ Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+ } else {
+ SetDefinition<func>(
+ ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+ }
+ } else {
+ if constexpr (is_first_arg_inst) {
+ func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+ } else {
+ func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+ }
+ }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+ using Traits = FuncTraits<decltype(func)>;
+ static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+ if constexpr (Traits::NUM_ARGS == 1) {
+ Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+ } else {
+ using FirstArgType = typename Traits::template ArgType<1>;
+ static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
+ using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+ Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+ }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...) \
+ case IR::Opcode::name: \
+ return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+ }
+ throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+Id TypeId(const EmitContext& ctx, IR::Type type) {
+ switch (type) {
+ case IR::Type::U1:
+ return ctx.U1;
+ case IR::Type::U32:
+ return ctx.U32[1];
+ default:
+ throw NotImplementedException("Phi node type {}", type);
+ }
+}
+
+void Traverse(EmitContext& ctx, IR::Program& program) {
+ IR::Block* current_block{};
+ for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::Block: {
+ const Id label{node.data.block->Definition<Id>()};
+ if (current_block) {
+ ctx.OpBranch(label);
+ }
+ current_block = node.data.block;
+ ctx.AddLabel(label);
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ EmitInst(ctx, &inst);
+ }
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::If: {
+ const Id if_label{node.data.if_node.body->Definition<Id>()};
+ const Id endif_label{node.data.if_node.merge->Definition<Id>()};
+ ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
+ ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label);
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::Loop: {
+ const Id body_label{node.data.loop.body->Definition<Id>()};
+ const Id continue_label{node.data.loop.continue_block->Definition<Id>()};
+ const Id endloop_label{node.data.loop.merge->Definition<Id>()};
+
+ ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
+ ctx.OpBranch(body_label);
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::Break: {
+ const Id break_label{node.data.break_node.merge->Definition<Id>()};
+ const Id skip_label{node.data.break_node.skip->Definition<Id>()};
+ ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label);
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ if (current_block) {
+ ctx.OpBranch(node.data.end_if.merge->Definition<Id>());
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Repeat: {
+ Id cond{ctx.Def(node.data.repeat.cond)};
+ if (!Settings::values.disable_shader_loop_safety_checks) {
+ const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])};
+ const Id safety_counter{ctx.AddGlobalVariable(
+ pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))};
+ if (ctx.profile.supported_spirv >= 0x00010400) {
+ ctx.interfaces.push_back(safety_counter);
+ }
+ const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)};
+ const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))};
+ ctx.OpStore(safety_counter, new_counter);
+
+ const Id safety_cond{
+ ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)};
+ cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond);
+ }
+ const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()};
+ const Id merge_label{node.data.repeat.merge->Definition<Id>()};
+ ctx.OpBranchConditional(cond, loop_header_label, merge_label);
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::Return:
+ ctx.OpReturn();
+ break;
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ ctx.OpUnreachable();
+ break;
+ }
+ if (node.type != IR::AbstractSyntaxNode::Type::Block) {
+ current_block = nullptr;
+ }
+ }
+}
+
+Id DefineMain(EmitContext& ctx, IR::Program& program) {
+ const Id void_function{ctx.TypeFunction(ctx.void_id)};
+ const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
+ for (IR::Block* const block : program.blocks) {
+ block->SetDefinition(ctx.OpLabel());
+ }
+ Traverse(ctx, program);
+ ctx.OpFunctionEnd();
+ return main;
+}
+
+spv::ExecutionMode ExecutionMode(TessPrimitive primitive) {
+ switch (primitive) {
+ case TessPrimitive::Isolines:
+ return spv::ExecutionMode::Isolines;
+ case TessPrimitive::Triangles:
+ return spv::ExecutionMode::Triangles;
+ case TessPrimitive::Quads:
+ return spv::ExecutionMode::Quads;
+ }
+ throw InvalidArgument("Tessellation primitive {}", primitive);
+}
+
+spv::ExecutionMode ExecutionMode(TessSpacing spacing) {
+ switch (spacing) {
+ case TessSpacing::Equal:
+ return spv::ExecutionMode::SpacingEqual;
+ case TessSpacing::FractionalOdd:
+ return spv::ExecutionMode::SpacingFractionalOdd;
+ case TessSpacing::FractionalEven:
+ return spv::ExecutionMode::SpacingFractionalEven;
+ }
+ throw InvalidArgument("Tessellation spacing {}", spacing);
+}
+
+void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
+ const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
+ spv::ExecutionModel execution_model{};
+ switch (program.stage) {
+ case Stage::Compute: {
+ const std::array<u32, 3> workgroup_size{program.workgroup_size};
+ execution_model = spv::ExecutionModel::GLCompute;
+ ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
+ workgroup_size[1], workgroup_size[2]);
+ break;
+ }
+ case Stage::VertexB:
+ execution_model = spv::ExecutionModel::Vertex;
+ break;
+ case Stage::TessellationControl:
+ execution_model = spv::ExecutionModel::TessellationControl;
+ ctx.AddCapability(spv::Capability::Tessellation);
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations);
+ break;
+ case Stage::TessellationEval:
+ execution_model = spv::ExecutionModel::TessellationEvaluation;
+ ctx.AddCapability(spv::Capability::Tessellation);
+ ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive));
+ ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing));
+ ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise
+ ? spv::ExecutionMode::VertexOrderCw
+ : spv::ExecutionMode::VertexOrderCcw);
+ break;
+ case Stage::Geometry:
+ execution_model = spv::ExecutionModel::Geometry;
+ ctx.AddCapability(spv::Capability::Geometry);
+ ctx.AddCapability(spv::Capability::GeometryStreams);
+ switch (ctx.runtime_info.input_topology) {
+ case InputTopology::Points:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints);
+ break;
+ case InputTopology::Lines:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines);
+ break;
+ case InputTopology::LinesAdjacency:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency);
+ break;
+ case InputTopology::Triangles:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles);
+ break;
+ case InputTopology::TrianglesAdjacency:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency);
+ break;
+ }
+ switch (program.output_topology) {
+ case OutputTopology::PointList:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints);
+ break;
+ case OutputTopology::LineStrip:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip);
+ break;
+ case OutputTopology::TriangleStrip:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip);
+ break;
+ }
+ if (program.info.stores[IR::Attribute::PointSize]) {
+ ctx.AddCapability(spv::Capability::GeometryPointSize);
+ }
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices);
+ ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations);
+ if (program.is_geometry_passthrough) {
+ if (ctx.profile.support_geometry_shader_passthrough) {
+ ctx.AddExtension("SPV_NV_geometry_shader_passthrough");
+ ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV);
+ } else {
+ LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support");
+ }
+ }
+ break;
+ case Stage::Fragment:
+ execution_model = spv::ExecutionModel::Fragment;
+ if (ctx.profile.lower_left_origin_mode) {
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
+ } else {
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
+ }
+ if (program.info.stores_frag_depth) {
+ ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
+ }
+ if (ctx.runtime_info.force_early_z) {
+ ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
+ }
+ break;
+ default:
+ throw NotImplementedException("Stage {}", program.stage);
+ }
+ ctx.AddEntryPoint(execution_model, main, "main", interfaces);
+}
+
+void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
+ Id main_func) {
+ const Info& info{program.info};
+ if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
+ LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
+ } else if (info.uses_fp32_denorms_flush) {
+ if (profile.support_fp32_denorm_flush) {
+ ctx.AddCapability(spv::Capability::DenormFlushToZero);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
+ } else {
+ // Drivers will most likely flush denorms by default, no need to warn
+ }
+ } else if (info.uses_fp32_denorms_preserve) {
+ if (profile.support_fp32_denorm_preserve) {
+ ctx.AddCapability(spv::Capability::DenormPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
+ } else {
+ LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
+ }
+ }
+ if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) {
+ // No separate denorm behavior
+ return;
+ }
+ if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) {
+ LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader");
+ } else if (info.uses_fp16_denorms_flush) {
+ if (profile.support_fp16_denorm_flush) {
+ ctx.AddCapability(spv::Capability::DenormFlushToZero);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U);
+ } else {
+ // Same as fp32, no need to warn as most drivers will flush by default
+ }
+ } else if (info.uses_fp16_denorms_preserve) {
+ if (profile.support_fp16_denorm_preserve) {
+ ctx.AddCapability(spv::Capability::DenormPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
+ } else {
+ LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support");
+ }
+ }
+}
+
+void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program,
+ EmitContext& ctx, Id main_func) {
+ if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
+ return;
+ }
+ if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
+ ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
+ }
+ if (profile.support_fp32_signed_zero_nan_preserve) {
+ ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
+ }
+ if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
+ ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U);
+ }
+}
+
+void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
+ if (info.uses_sampled_1d) {
+ ctx.AddCapability(spv::Capability::Sampled1D);
+ }
+ if (info.uses_sparse_residency) {
+ ctx.AddCapability(spv::Capability::SparseResidency);
+ }
+ if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) {
+ ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
+ ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
+ }
+ if (info.stores[IR::Attribute::ViewportIndex]) {
+ ctx.AddCapability(spv::Capability::MultiViewport);
+ }
+ if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+ ctx.AddExtension("SPV_NV_viewport_array2");
+ ctx.AddCapability(spv::Capability::ShaderViewportMaskNV);
+ }
+ if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) {
+ if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
+ ctx.AddExtension("SPV_EXT_shader_viewport_index_layer");
+ ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
+ }
+ }
+ if (!profile.support_vertex_instance_id &&
+ (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) {
+ ctx.AddExtension("SPV_KHR_shader_draw_parameters");
+ ctx.AddCapability(spv::Capability::DrawParameters);
+ }
+ if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
+ info.uses_subgroup_shuffles) &&
+ profile.support_vote) {
+ ctx.AddExtension("SPV_KHR_shader_ballot");
+ ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
+ if (!profile.warp_size_potentially_larger_than_guest) {
+ // vote ops are only used when not taking the long path
+ ctx.AddExtension("SPV_KHR_subgroup_vote");
+ ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
+ }
+ }
+ if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
+ ctx.AddCapability(spv::Capability::Int64Atomics);
+ }
+ if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
+ ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
+ }
+ if (info.uses_typeless_image_writes) {
+ ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
+ }
+ if (info.uses_image_buffers) {
+ ctx.AddCapability(spv::Capability::ImageBuffer);
+ }
+ if (info.uses_sample_id) {
+ ctx.AddCapability(spv::Capability::SampleRateShading);
+ }
+ if (!ctx.runtime_info.xfb_varyings.empty()) {
+ ctx.AddCapability(spv::Capability::TransformFeedback);
+ }
+ if (info.uses_derivatives) {
+ ctx.AddCapability(spv::Capability::DerivativeControl);
+ }
+ // TODO: Track this usage
+ ctx.AddCapability(spv::Capability::ImageGatherExtended);
+ ctx.AddCapability(spv::Capability::ImageQuery);
+ ctx.AddCapability(spv::Capability::SampledBuffer);
+}
+
+void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
+ auto inst{program.blocks.front()->begin()};
+ size_t block_index{0};
+ ctx.PatchDeferredPhi([&](size_t phi_arg) {
+ if (phi_arg == 0) {
+ ++inst;
+ if (inst == program.blocks[block_index]->end() ||
+ inst->GetOpcode() != IR::Opcode::Phi) {
+ do {
+ ++block_index;
+ inst = program.blocks[block_index]->begin();
+ } while (inst->GetOpcode() != IR::Opcode::Phi);
+ }
+ }
+ return ctx.Def(inst->Arg(phi_arg));
+ });
+}
+} // Anonymous namespace
+
+std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings) {
+ EmitContext ctx{profile, runtime_info, program, bindings};
+ const Id main{DefineMain(ctx, program)};
+ DefineEntryPoint(program, ctx, main);
+ if (profile.support_float_controls) {
+ ctx.AddExtension("SPV_KHR_float_controls");
+ SetupDenormControl(profile, program, ctx, main);
+ SetupSignedNanCapabilities(profile, program, ctx, main);
+ }
+ SetupCapabilities(profile, program.info, ctx);
+ PatchPhiNodes(program, ctx);
+ return ctx.Assemble();
+}
+
+Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
+ const size_t num_args{inst->NumArgs()};
+ boost::container::small_vector<Id, 32> blocks;
+ blocks.reserve(num_args);
+ for (size_t index = 0; index < num_args; ++index) {
+ blocks.push_back(inst->PhiBlock(index)->Definition<Id>());
+ }
+ // The type of a phi instruction is stored in its flags
+ const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())};
+ return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size()));
+}
+
+void EmitVoid(EmitContext&) {}
+
+Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
+ const Id id{ctx.Def(value)};
+ if (!Sirit::ValidId(id)) {
+ throw NotImplementedException("Forward identity declaration");
+ }
+ return id;
+}
+
+Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
+ const Id id{ctx.Def(value)};
+ if (!Sirit::ValidId(id)) {
+ throw NotImplementedException("Forward identity declaration");
+ }
+ return id;
+}
+
+void EmitReference(EmitContext&) {}
+
+void EmitPhiMove(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetZeroFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetSignFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetCarryFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetOverflowFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetSparseFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetInBoundsFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
new file mode 100644
index 000000000..db0c935fe
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -0,0 +1,27 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/spirv/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::SPIRV {
+
+[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) {
+ Bindings binding;
+ return EmitSPIRV(profile, {}, program, binding);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
new file mode 100644
index 000000000..9af8bb9e1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -0,0 +1,448 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
+ const Id shift_id{ctx.Const(2U)};
+ Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ if (index_offset > 0) {
+ index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
+ }
+ return ctx.profile.support_explicit_workgroup_layout
+ ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
+ : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
+}
+
+Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
+ if (offset.IsImmediate()) {
+ const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
+ return ctx.Const(imm_offset);
+ }
+ const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
+ const Id index{ctx.Def(offset)};
+ if (shift == 0) {
+ return index;
+ }
+ const Id shift_id{ctx.Const(shift)};
+ return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
+}
+
+Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def,
+ Id StorageDefinitions::*member_ptr, const IR::Value& binding,
+ const IR::Value& offset, size_t element_size) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Dynamic storage buffer indexing");
+ }
+ const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
+ const Id index{StorageIndex(ctx, offset, element_size)};
+ return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
+}
+
+std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
+ const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
+ const Id semantics{ctx.u32_zero_value};
+ return {scope, semantics};
+}
+
+Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+ const Id pointer{SharedPointer(ctx, offset)};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding,
+ offset, sizeof(u32))};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id),
+ Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
+ if (ctx.profile.support_int64_atomics) {
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
+ binding, offset, sizeof(u64))};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
+ }
+ LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
+ binding, offset, sizeof(u32[2]))};
+ const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
+ const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)};
+ ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result));
+ return original_value;
+}
+} // Anonymous namespace
+
+Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) {
+ const Id shift_id{ctx.Const(2U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value);
+}
+
+Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) {
+ const Id shift_id{ctx.Const(2U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value);
+}
+
+Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) {
+ const Id shift_id{ctx.Const(3U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
+ }
+ LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+ const Id pointer_1{SharedPointer(ctx, offset, 0)};
+ const Id pointer_2{SharedPointer(ctx, offset, 1)};
+ const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
+ const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
+ const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)};
+ ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U));
+ ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U));
+ return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2));
+}
+
+Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd,
+ &Sirit::Module::OpIAdd);
+}
+
+Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin,
+ &Sirit::Module::OpSMin);
+}
+
+Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin,
+ &Sirit::Module::OpUMin);
+}
+
+Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax,
+ &Sirit::Module::OpSMax);
+}
+
+Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax,
+ &Sirit::Module::OpUMax);
+}
+
+Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd,
+ &Sirit::Module::OpBitwiseAnd);
+}
+
+Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr,
+ &Sirit::Module::OpBitwiseOr);
+}
+
+Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor,
+ &Sirit::Module::OpBitwiseXor);
+}
+
+Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ if (ctx.profile.support_int64_atomics) {
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
+ binding, offset, sizeof(u64))};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
+ }
+ LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
+ binding, offset, sizeof(u32[2]))};
+ const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
+ ctx.OpStore(pointer, value);
+ return original;
+}
+
+Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
+ return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
+ return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
+ return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
+ return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
+ return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
+ return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitGlobalAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicInc32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicDec32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicOr32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicXor32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicIAdd64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMin64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMin64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMax64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMax64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicInc64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicDec64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAnd64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicOr64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicXor64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicExchange64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF16x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF32x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMinF16x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMinF32x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
new file mode 100644
index 000000000..e0b52a001
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
+ const auto semantics{
+ spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
+ spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
+ spv::MemorySemanticsMask::ImageMemory};
+ ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics)));
+}
+} // Anonymous namespace
+
+void EmitBarrier(EmitContext& ctx) {
+ const auto execution{spv::Scope::Workgroup};
+ const auto memory{spv::Scope::Workgroup};
+ const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
+ spv::MemorySemanticsMask::WorkgroupMemory};
+ ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)),
+ ctx.Const(static_cast<u32>(memory)),
+ ctx.Const(static_cast<u32>(memory_semantics)));
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+ MemoryBarrier(ctx, spv::Scope::Workgroup);
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+ MemoryBarrier(ctx, spv::Scope::Device);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
new file mode 100644
index 000000000..bb11f4f4e
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+void EmitBitCastU16F16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U32[1], value);
+}
+
+void EmitBitCastU64F64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitBitCastF16U16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.F32[1], value);
+}
+
+void EmitBitCastF64U64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitPackUint2x32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U64, value);
+}
+
+Id EmitUnpackUint2x32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U32[2], value);
+}
+
+Id EmitPackFloat2x16(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U32[1], value);
+}
+
+Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.F16[2], value);
+}
+
+Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
+ return ctx.OpPackHalf2x16(ctx.U32[1], value);
+}
+
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
+ return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
+}
+
+Id EmitPackDouble2x32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.F64[1], value);
+}
+
+Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U32[2], value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
new file mode 100644
index 000000000..10ff4ecab
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -0,0 +1,155 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
+ return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
+}
+
+Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+ return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+ return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index);
+}
+
+Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index);
+}
+
+Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
+}
+
+Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
+ return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
+}
+
+Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+ return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+ return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
+}
+
+Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
+ return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
+}
+
+Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+ return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+ return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
+}
+
+void EmitCompositeConstructF64x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeConstructF64x3(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeConstructF64x4(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x3(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x4(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
new file mode 100644
index 000000000..fb8c02a77
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -0,0 +1,505 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <tuple>
+#include <utility>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+struct AttrInfo {
+ Id pointer;
+ Id id;
+ bool needs_cast;
+};
+
+std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
+ const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
+ switch (type) {
+ case AttributeType::Float:
+ return AttrInfo{ctx.input_f32, ctx.F32[1], false};
+ case AttributeType::UnsignedInt:
+ return AttrInfo{ctx.input_u32, ctx.U32[1], true};
+ case AttributeType::SignedInt:
+ return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
+ case AttributeType::Disabled:
+ return std::nullopt;
+ }
+ throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+template <typename... Args>
+Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
+ switch (ctx.stage) {
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ case Stage::Geometry:
+ return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...);
+ default:
+ return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...);
+ }
+}
+
+template <typename... Args>
+Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
+ if (ctx.stage == Stage::TessellationControl) {
+ const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)};
+ return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...);
+ } else {
+ return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...);
+ }
+}
+
+struct OutAttr {
+ OutAttr(Id pointer_) : pointer{pointer_} {}
+ OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {}
+
+ Id pointer{};
+ Id type{};
+};
+
+std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ const u32 element{IR::GenericAttributeElement(attr)};
+ const GenericElementInfo& info{ctx.output_generics.at(index).at(element)};
+ if (info.num_components == 1) {
+ return info.id;
+ } else {
+ const u32 index_element{element - info.first_element};
+ const Id index_id{ctx.Const(index_element)};
+ return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
+ }
+ }
+ switch (attr) {
+ case IR::Attribute::PointSize:
+ return ctx.output_point_size;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW: {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const Id element_id{ctx.Const(element)};
+ return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
+ }
+ case IR::Attribute::ClipDistance0:
+ case IR::Attribute::ClipDistance1:
+ case IR::Attribute::ClipDistance2:
+ case IR::Attribute::ClipDistance3:
+ case IR::Attribute::ClipDistance4:
+ case IR::Attribute::ClipDistance5:
+ case IR::Attribute::ClipDistance6:
+ case IR::Attribute::ClipDistance7: {
+ const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
+ const u32 index{static_cast<u32>(attr) - base};
+ const Id clip_num{ctx.Const(index)};
+ return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num);
+ }
+ case IR::Attribute::Layer:
+ if (ctx.profile.support_viewport_index_layer_non_geometry ||
+ ctx.stage == Shader::Stage::Geometry) {
+ return OutAttr{ctx.layer, ctx.U32[1]};
+ }
+ return std::nullopt;
+ case IR::Attribute::ViewportIndex:
+ if (ctx.profile.support_viewport_index_layer_non_geometry ||
+ ctx.stage == Shader::Stage::Geometry) {
+ return OutAttr{ctx.viewport_index, ctx.U32[1]};
+ }
+ return std::nullopt;
+ case IR::Attribute::ViewportMask:
+ if (!ctx.profile.support_viewport_mask) {
+ return std::nullopt;
+ }
+ return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value),
+ ctx.U32[1]};
+ default:
+ throw NotImplementedException("Read attribute {}", attr);
+ }
+}
+
+Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
+ const IR::Value& binding, const IR::Value& offset) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Constant buffer indexing");
+ }
+ const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
+ const Id uniform_type{ctx.uniform_types.*member_ptr};
+ if (!offset.IsImmediate()) {
+ Id index{ctx.Def(offset)};
+ if (element_size > 1) {
+ const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
+ const Id shift{ctx.Const(log2_element_size)};
+ index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
+ }
+ const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
+ return ctx.OpLoad(result_type, access_chain);
+ }
+ // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
+ const Id imm_offset{ctx.Const(offset.U32() / element_size)};
+ const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
+ return ctx.OpLoad(result_type, access_chain);
+}
+
+Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset);
+}
+
+Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset);
+}
+
+Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) {
+ if (offset.IsImmediate()) {
+ const u32 element{(offset.U32() / 4) % 4 + index_offset};
+ return ctx.OpCompositeExtract(ctx.U32[1], vector, element);
+ }
+ const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))};
+ Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))};
+ if (index_offset > 0) {
+ element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset));
+ }
+ return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element);
+}
+} // Anonymous namespace
+
+void EmitGetRegister(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitSetRegister(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetPred(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitSetPred(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitSetGotoVariable(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetGotoVariable(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitSetIndirectBranchVariable(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetIndirectBranchVariable(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
+ const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};
+ return ctx.OpUConvert(ctx.U32[1], load);
+ }
+ Id element{};
+ if (ctx.profile.support_descriptor_aliasing) {
+ element = GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ element = GetCbufElement(ctx, vector, offset, 0u);
+ }
+ const Id bit_offset{ctx.BitOffset8(offset)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
+}
+
+Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
+ const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)};
+ return ctx.OpSConvert(ctx.U32[1], load);
+ }
+ Id element{};
+ if (ctx.profile.support_descriptor_aliasing) {
+ element = GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ element = GetCbufElement(ctx, vector, offset, 0u);
+ }
+ const Id bit_offset{ctx.BitOffset8(offset)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
+}
+
+Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
+ const Id load{
+ GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)};
+ return ctx.OpUConvert(ctx.U32[1], load);
+ }
+ Id element{};
+ if (ctx.profile.support_descriptor_aliasing) {
+ element = GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ element = GetCbufElement(ctx, vector, offset, 0u);
+ }
+ const Id bit_offset{ctx.BitOffset16(offset)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
+}
+
+Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
+ const Id load{
+ GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)};
+ return ctx.OpSConvert(ctx.U32[1], load);
+ }
+ Id element{};
+ if (ctx.profile.support_descriptor_aliasing) {
+ element = GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ element = GetCbufElement(ctx, vector, offset, 0u);
+ }
+ const Id bit_offset{ctx.BitOffset16(offset)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
+}
+
+Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ return GetCbufElement(ctx, vector, offset, 0u);
+ }
+}
+
+Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u));
+ }
+}
+
+Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding,
+ offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u),
+ GetCbufElement(ctx, vector, offset, 1u));
+ }
+}
+
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
+ if (!type) {
+ // Attribute is disabled
+ return ctx.Const(element == 3 ? 1.0f : 0.0f);
+ }
+ if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+ // Varying component is not written
+ return ctx.Const(type && element == 3 ? 1.0f : 0.0f);
+ }
+ const Id generic_id{ctx.input_generics.at(index)};
+ const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
+ const Id value{ctx.OpLoad(type->id, pointer)};
+ return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
+ }
+ switch (attr) {
+ case IR::Attribute::PrimitiveId:
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
+ ctx.Const(element)));
+ case IR::Attribute::InstanceId:
+ if (ctx.profile.support_vertex_instance_id) {
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
+ } else {
+ const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
+ const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
+ }
+ case IR::Attribute::VertexId:
+ if (ctx.profile.support_vertex_instance_id) {
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id));
+ } else {
+ const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
+ const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
+ }
+ case IR::Attribute::FrontFace:
+ return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
+ ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
+ case IR::Attribute::PointSpriteS:
+ return ctx.OpLoad(ctx.F32[1],
+ ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
+ case IR::Attribute::PointSpriteT:
+ return ctx.OpLoad(ctx.F32[1],
+ ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U)));
+ case IR::Attribute::TessellationEvaluationPointU:
+ return ctx.OpLoad(ctx.F32[1],
+ ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
+ case IR::Attribute::TessellationEvaluationPointV:
+ return ctx.OpLoad(ctx.F32[1],
+ ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U)));
+
+ default:
+ throw NotImplementedException("Read attribute {}", attr);
+ }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
+ const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
+ if (!output) {
+ return;
+ }
+ if (Sirit::ValidId(output->type)) {
+ value = ctx.OpBitcast(output->type, value);
+ }
+ ctx.OpStore(output->pointer, value);
+}
+
+Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) {
+ switch (ctx.stage) {
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ case Stage::Geometry:
+ return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex);
+ default:
+ return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset);
+ }
+}
+
+void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) {
+ ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);
+}
+
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Non-generic patch load");
+ }
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const Id element{ctx.Const(IR::GenericPatchElement(patch))};
+ const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32};
+ const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
+ return ctx.OpLoad(ctx.F32[1], pointer);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
+ const Id pointer{[&] {
+ if (IR::IsGeneric(patch)) {
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const Id element{ctx.Const(IR::GenericPatchElement(patch))};
+ return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodBottom: {
+ const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+ const Id index_id{ctx.Const(index)};
+ return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
+ }
+ case IR::Patch::TessellationLodInteriorU:
+ return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
+ ctx.u32_zero_value);
+ case IR::Patch::TessellationLodInteriorV:
+ return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u));
+ default:
+ throw NotImplementedException("Patch {}", patch);
+ }
+ }()};
+ ctx.OpStore(pointer, value);
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
+ const Id component_id{ctx.Const(component)};
+ const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
+ ctx.OpStore(pointer, value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, Id value) {
+ ctx.OpStore(ctx.sample_mask, value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, Id value) {
+ ctx.OpStore(ctx.frag_depth, value);
+}
+
+void EmitGetZFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetSFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetCFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetOFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetZFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetSFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetCFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetOFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitWorkgroupId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id);
+}
+
+Id EmitLocalInvocationId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
+}
+
+Id EmitInvocationId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
+}
+
+Id EmitSampleId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[1], ctx.sample_id);
+}
+
+Id EmitIsHelperInvocation(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation);
+}
+
+Id EmitYDirection(EmitContext& ctx) {
+ return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
+}
+
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
+ const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
+ const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+ ctx.OpStore(pointer, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
new file mode 100644
index 000000000..d33486f28
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+void EmitJoin(EmitContext&) {
+ throw NotImplementedException("Join shouldn't be emitted");
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+ if (ctx.profile.support_demote_to_helper_invocation) {
+ ctx.OpDemoteToHelperInvocationEXT();
+ } else {
+ const Id kill_label{ctx.OpLabel()};
+ const Id impossible_label{ctx.OpLabel()};
+ ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone);
+ ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label);
+ ctx.AddLabel(kill_label);
+ ctx.OpKill();
+ ctx.AddLabel(impossible_label);
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
new file mode 100644
index 000000000..fd42b7a16
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -0,0 +1,269 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id ExtractU16(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpUConvert(ctx.U16, value);
+ } else {
+ return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
+ }
+}
+
+Id ExtractS16(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpSConvert(ctx.S16, value);
+ } else {
+ return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
+ }
+}
+
+Id ExtractU8(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int8) {
+ return ctx.OpUConvert(ctx.U8, value);
+ } else {
+ return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
+ }
+}
+
+Id ExtractS8(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int8) {
+ return ctx.OpSConvert(ctx.S8, value);
+ } else {
+ return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
+ }
+}
+} // Anonymous namespace
+
+Id EmitConvertS16F16(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+ } else {
+ return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertS16F32(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+ } else {
+ return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertS16F64(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+ } else {
+ return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertS32F16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS32F32(EmitContext& ctx, Id value) {
+ if (ctx.profile.has_broken_signed_operations) {
+ return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
+ } else {
+ return ctx.OpConvertFToS(ctx.U32[1], value);
+ }
+}
+
+Id EmitConvertS32F64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS64F16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertS64F32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertS64F64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertU16F16(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+ } else {
+ return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertU16F32(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+ } else {
+ return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertU16F64(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+ } else {
+ return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertU32F16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU32F32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU32F64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU64F16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64F32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64F64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64U32(EmitContext& ctx, Id value) {
+ return ctx.OpUConvert(ctx.U64, value);
+}
+
+Id EmitConvertU32U64(EmitContext& ctx, Id value) {
+ return ctx.OpUConvert(ctx.U32[1], value);
+}
+
+Id EmitConvertF16F32(EmitContext& ctx, Id value) {
+ return ctx.OpFConvert(ctx.F16[1], value);
+}
+
+Id EmitConvertF32F16(EmitContext& ctx, Id value) {
+ return ctx.OpFConvert(ctx.F32[1], value);
+}
+
+Id EmitConvertF32F64(EmitContext& ctx, Id value) {
+ return ctx.OpFConvert(ctx.F32[1], value);
+}
+
+Id EmitConvertF64F32(EmitContext& ctx, Id value) {
+ return ctx.OpFConvert(ctx.F64[1], value);
+}
+
+Id EmitConvertF16S8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF16S16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF16S32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16S64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF16U16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF16U32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF32S8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF32S16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF32S32(EmitContext& ctx, Id value) {
+ if (ctx.profile.has_broken_signed_operations) {
+ value = ctx.OpBitcast(ctx.S32[1], value);
+ }
+ return ctx.OpConvertSToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32S64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32U8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF32U16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF32U32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32U64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF64S8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF64S16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF64S32(EmitContext& ctx, Id value) {
+ if (ctx.profile.has_broken_signed_operations) {
+ value = ctx.OpBitcast(ctx.S32[1], value);
+ }
+ return ctx.OpConvertSToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64S64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64U8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF64U16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF64U32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64U64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F64[1], value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
new file mode 100644
index 000000000..61cf25f9c
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -0,0 +1,396 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
+ const auto flags{inst->Flags<IR::FpControl>()};
+ if (flags.no_contraction) {
+ ctx.Decorate(op, spv::Decoration::NoContraction);
+ }
+ return op;
+}
+
+Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
+ if (ctx.profile.has_broken_spirv_clamp) {
+ return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
+ } else {
+ return ctx.OpFClamp(type, value, zero, one);
+ }
+}
+
+Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ if (ctx.profile.ignore_nan_fp_comparisons) {
+ const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)};
+ const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))};
+ const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))};
+ return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan);
+ } else {
+ return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
+ }
+}
+
+Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) {
+ if (ctx.profile.ignore_nan_fp_comparisons) {
+ const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)};
+ const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)};
+ const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)};
+ return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan);
+ } else {
+ return (ctx.*comp_func)(ctx.U1, lhs, rhs);
+ }
+}
+} // Anonymous namespace
+
+Id EmitFPAbs16(EmitContext& ctx, Id value) {
+ return ctx.OpFAbs(ctx.F16[1], value);
+}
+
+Id EmitFPAbs32(EmitContext& ctx, Id value) {
+ return ctx.OpFAbs(ctx.F32[1], value);
+}
+
+Id EmitFPAbs64(EmitContext& ctx, Id value) {
+ return ctx.OpFAbs(ctx.F64[1], value);
+}
+
+Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
+}
+
+Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
+}
+
+Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
+}
+
+Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+ return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
+}
+
+Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+ return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
+}
+
+Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+ return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
+}
+
+Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpFMax(ctx.F32[1], a, b);
+}
+
+Id EmitFPMax64(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpFMax(ctx.F64[1], a, b);
+}
+
+Id EmitFPMin32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpFMin(ctx.F32[1], a, b);
+}
+
+Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpFMin(ctx.F64[1], a, b);
+}
+
+Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
+}
+
+Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
+}
+
+Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
+}
+
+Id EmitFPNeg16(EmitContext& ctx, Id value) {
+ return ctx.OpFNegate(ctx.F16[1], value);
+}
+
+Id EmitFPNeg32(EmitContext& ctx, Id value) {
+ return ctx.OpFNegate(ctx.F32[1], value);
+}
+
+Id EmitFPNeg64(EmitContext& ctx, Id value) {
+ return ctx.OpFNegate(ctx.F64[1], value);
+}
+
+Id EmitFPSin(EmitContext& ctx, Id value) {
+ return ctx.OpSin(ctx.F32[1], value);
+}
+
+Id EmitFPCos(EmitContext& ctx, Id value) {
+ return ctx.OpCos(ctx.F32[1], value);
+}
+
+Id EmitFPExp2(EmitContext& ctx, Id value) {
+ return ctx.OpExp2(ctx.F32[1], value);
+}
+
+Id EmitFPLog2(EmitContext& ctx, Id value) {
+ return ctx.OpLog2(ctx.F32[1], value);
+}
+
+Id EmitFPRecip32(EmitContext& ctx, Id value) {
+ return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value);
+}
+
+Id EmitFPRecip64(EmitContext& ctx, Id value) {
+ return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value);
+}
+
+Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) {
+ return ctx.OpInverseSqrt(ctx.F32[1], value);
+}
+
+Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) {
+ return ctx.OpInverseSqrt(ctx.F64[1], value);
+}
+
+Id EmitFPSqrt(EmitContext& ctx, Id value) {
+ return ctx.OpSqrt(ctx.F32[1], value);
+}
+
+Id EmitFPSaturate16(EmitContext& ctx, Id value) {
+ const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
+ const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
+ return Clamp(ctx, ctx.F16[1], value, zero, one);
+}
+
+Id EmitFPSaturate32(EmitContext& ctx, Id value) {
+ const Id zero{ctx.Const(f32{0.0})};
+ const Id one{ctx.Const(f32{1.0})};
+ return Clamp(ctx, ctx.F32[1], value, zero, one);
+}
+
+Id EmitFPSaturate64(EmitContext& ctx, Id value) {
+ const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
+ const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
+ return Clamp(ctx, ctx.F64[1], value, zero, one);
+}
+
+Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+ return Clamp(ctx, ctx.F16[1], value, min_value, max_value);
+}
+
+Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+ return Clamp(ctx, ctx.F32[1], value, min_value, max_value);
+}
+
+Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+ return Clamp(ctx, ctx.F64[1], value, min_value, max_value);
+}
+
+Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
+ return ctx.OpRoundEven(ctx.F16[1], value);
+}
+
+Id EmitFPRoundEven32(EmitContext& ctx, Id value) {
+ return ctx.OpRoundEven(ctx.F32[1], value);
+}
+
+Id EmitFPRoundEven64(EmitContext& ctx, Id value) {
+ return ctx.OpRoundEven(ctx.F64[1], value);
+}
+
+Id EmitFPFloor16(EmitContext& ctx, Id value) {
+ return ctx.OpFloor(ctx.F16[1], value);
+}
+
+Id EmitFPFloor32(EmitContext& ctx, Id value) {
+ return ctx.OpFloor(ctx.F32[1], value);
+}
+
+Id EmitFPFloor64(EmitContext& ctx, Id value) {
+ return ctx.OpFloor(ctx.F64[1], value);
+}
+
+Id EmitFPCeil16(EmitContext& ctx, Id value) {
+ return ctx.OpCeil(ctx.F16[1], value);
+}
+
+Id EmitFPCeil32(EmitContext& ctx, Id value) {
+ return ctx.OpCeil(ctx.F32[1], value);
+}
+
+Id EmitFPCeil64(EmitContext& ctx, Id value) {
+ return ctx.OpCeil(ctx.F64[1], value);
+}
+
+Id EmitFPTrunc16(EmitContext& ctx, Id value) {
+ return ctx.OpTrunc(ctx.F16[1], value);
+}
+
+Id EmitFPTrunc32(EmitContext& ctx, Id value) {
+ return ctx.OpTrunc(ctx.F32[1], value);
+}
+
+Id EmitFPTrunc64(EmitContext& ctx, Id value) {
+ return ctx.OpTrunc(ctx.F64[1], value);
+}
+
+Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPIsNan16(EmitContext& ctx, Id value) {
+ return ctx.OpIsNan(ctx.U1, value);
+}
+
+Id EmitFPIsNan32(EmitContext& ctx, Id value) {
+ return ctx.OpIsNan(ctx.U1, value);
+}
+
+Id EmitFPIsNan64(EmitContext& ctx, Id value) {
+ return ctx.OpIsNan(ctx.U1, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
new file mode 100644
index 000000000..3588f052b
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -0,0 +1,462 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <boost/container/static_vector.hpp>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+class ImageOperands {
+public:
+ explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
+ Id lod, const IR::Value& offset) {
+ if (has_bias) {
+ const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
+ Add(spv::ImageOperandsMask::Bias, bias);
+ }
+ if (has_lod) {
+ const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
+ Add(spv::ImageOperandsMask::Lod, lod_value);
+ }
+ AddOffset(ctx, offset);
+ if (has_lod_clamp) {
+ const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
+ Add(spv::ImageOperandsMask::MinLod, lod_clamp);
+ }
+ }
+
+ explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) {
+ if (offset2.IsEmpty()) {
+ if (offset.IsEmpty()) {
+ return;
+ }
+ Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
+ return;
+ }
+ const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
+ if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
+ LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring");
+ return;
+ }
+ const IR::Opcode opcode{values[0]->GetOpcode()};
+ if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
+ throw LogicError("Invalid PTP arguments");
+ }
+ auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
+
+ const Id offsets{ctx.ConstantComposite(
+ ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)),
+ ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)),
+ ctx.Const(read(1, 2), read(1, 3)))};
+ Add(spv::ImageOperandsMask::ConstOffsets, offsets);
+ }
+
+ explicit ImageOperands(Id offset, Id lod, Id ms) {
+ if (Sirit::ValidId(lod)) {
+ Add(spv::ImageOperandsMask::Lod, lod);
+ }
+ if (Sirit::ValidId(offset)) {
+ Add(spv::ImageOperandsMask::Offset, offset);
+ }
+ if (Sirit::ValidId(ms)) {
+ Add(spv::ImageOperandsMask::Sample, ms);
+ }
+ }
+
+ explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates,
+ Id offset, Id lod_clamp) {
+ if (!Sirit::ValidId(derivates)) {
+ throw LogicError("Derivates must be present");
+ }
+ boost::container::static_vector<Id, 3> deriv_x_accum;
+ boost::container::static_vector<Id, 3> deriv_y_accum;
+ for (u32 i = 0; i < num_derivates; ++i) {
+ deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2));
+ deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1));
+ }
+ const Id derivates_X{ctx.OpCompositeConstruct(
+ ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
+ const Id derivates_Y{ctx.OpCompositeConstruct(
+ ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
+ Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y);
+ if (Sirit::ValidId(offset)) {
+ Add(spv::ImageOperandsMask::Offset, offset);
+ }
+ if (has_lod_clamp) {
+ Add(spv::ImageOperandsMask::MinLod, lod_clamp);
+ }
+ }
+
+ std::span<const Id> Span() const noexcept {
+ return std::span{operands.data(), operands.size()};
+ }
+
+ std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept {
+ return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt;
+ }
+
+ spv::ImageOperandsMask Mask() const noexcept {
+ return mask;
+ }
+
+private:
+ void AddOffset(EmitContext& ctx, const IR::Value& offset) {
+ if (offset.IsEmpty()) {
+ return;
+ }
+ if (offset.IsImmediate()) {
+ Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32())));
+ return;
+ }
+ IR::Inst* const inst{offset.InstRecursive()};
+ if (inst->AreAllArgsImmediates()) {
+ switch (inst->GetOpcode()) {
+ case IR::Opcode::CompositeConstructU32x2:
+ Add(spv::ImageOperandsMask::ConstOffset,
+ ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+ static_cast<s32>(inst->Arg(1).U32())));
+ return;
+ case IR::Opcode::CompositeConstructU32x3:
+ Add(spv::ImageOperandsMask::ConstOffset,
+ ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+ static_cast<s32>(inst->Arg(1).U32()),
+ static_cast<s32>(inst->Arg(2).U32())));
+ return;
+ case IR::Opcode::CompositeConstructU32x4:
+ Add(spv::ImageOperandsMask::ConstOffset,
+ ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+ static_cast<s32>(inst->Arg(1).U32()),
+ static_cast<s32>(inst->Arg(2).U32()),
+ static_cast<s32>(inst->Arg(3).U32())));
+ return;
+ default:
+ break;
+ }
+ }
+ Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
+ }
+
+ void Add(spv::ImageOperandsMask new_mask, Id value) {
+ mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
+ static_cast<unsigned>(new_mask));
+ operands.push_back(value);
+ }
+
+ void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) {
+ mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
+ static_cast<unsigned>(new_mask));
+ operands.push_back(value_1);
+ operands.push_back(value_2);
+ }
+
+ boost::container::static_vector<Id, 4> operands;
+ spv::ImageOperandsMask mask{};
+};
+
+Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) {
+ const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
+ if (def.count > 1) {
+ const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))};
+ return ctx.OpLoad(def.sampled_type, pointer);
+ } else {
+ return ctx.OpLoad(def.sampled_type, def.id);
+ }
+}
+
+Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
+ if (!index.IsImmediate() || index.U32() != 0) {
+ throw NotImplementedException("Indirect image indexing");
+ }
+ if (info.type == TextureType::Buffer) {
+ const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)};
+ if (def.count > 1) {
+ throw NotImplementedException("Indirect texture sample");
+ }
+ const Id sampler_id{def.id};
+ const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)};
+ return ctx.OpImage(ctx.image_buffer_type, id);
+ } else {
+ const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
+ if (def.count > 1) {
+ throw NotImplementedException("Indirect texture sample");
+ }
+ return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id));
+ }
+}
+
+Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+ if (!index.IsImmediate() || index.U32() != 0) {
+ throw NotImplementedException("Indirect image indexing");
+ }
+ if (info.type == TextureType::Buffer) {
+ const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)};
+ return ctx.OpLoad(def.image_type, def.id);
+ } else {
+ const ImageDefinition def{ctx.images.at(info.descriptor_index)};
+ return ctx.OpLoad(def.image_type, def.id);
+ }
+}
+
+Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ if (info.relaxed_precision != 0) {
+ ctx.Decorate(sample, spv::Decoration::RelaxedPrecision);
+ }
+ return sample;
+}
+
+template <typename MethodPtrType, typename... Args>
+Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
+ Id result_type, Args&&... args) {
+ IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (!sparse) {
+ return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...));
+ }
+ const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
+ const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
+ const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
+ sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
+ sparse->Invalidate();
+ Decorate(ctx, inst, sample);
+ return ctx.OpCompositeExtract(result_type, sample, 1U);
+}
+} // Anonymous namespace
+
+Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id bias_lc, const IR::Value& offset) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ if (ctx.stage == Stage::Fragment) {
+ const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
+ bias_lc, offset);
+ return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
+ &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
+ Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
+ } else {
+ // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
+ // if the lod was explicitly zero. This may change on Turing with implicit compute
+ // derivatives
+ const Id lod{ctx.Const(0.0f)};
+ const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
+ return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+ }
+}
+
+Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id lod, const IR::Value& offset) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, false, true, false, lod, offset);
+ return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+}
+
+Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+ Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
+ offset);
+ return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
+ &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
+ Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+ Id coords, Id dref, Id lod, const IR::Value& offset) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, false, true, false, lod, offset);
+ return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
+ &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
+ Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span());
+}
+
+Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ const IR::Value& offset, const IR::Value& offset2) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, offset, offset2);
+ return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
+ ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
+ operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ const IR::Value& offset, const IR::Value& offset2, Id dref) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, offset, offset2);
+ return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
+ ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(),
+ operands.Span());
+}
+
+Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
+ Id lod, Id ms) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ if (info.type == TextureType::Buffer) {
+ lod = Id{};
+ }
+ const ImageOperands operands(offset, lod, ms);
+ return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
+ TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const Id image{TextureImage(ctx, info, index)};
+ const Id zero{ctx.u32_zero_value};
+ const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }};
+ switch (info.type) {
+ case TextureType::Color1D:
+ return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod),
+ zero, zero, mips());
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ case TextureType::ColorCube:
+ return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod),
+ zero, mips());
+ case TextureType::ColorArray2D:
+ case TextureType::Color3D:
+ case TextureType::ColorArrayCube:
+ return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod),
+ mips());
+ case TextureType::Buffer:
+ return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero,
+ zero, mips());
+ }
+ throw LogicError("Unspecified image type {}", info.type.Value());
+}
+
+Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const Id zero{ctx.f32_zero_value};
+ const Id sampler{Texture(ctx, info, index)};
+ return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
+ zero, zero);
+}
+
+Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id derivates, Id offset, Id lod_clamp) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates,
+ offset, lod_clamp);
+ return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+}
+
+Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
+ LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
+ return ctx.ConstantNull(ctx.U32[4]);
+ }
+ return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4],
+ Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{});
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ ctx.OpImageWrite(Image(ctx, index, info), coords, color);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
new file mode 100644
index 000000000..d7f1a365a
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
@@ -0,0 +1,183 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+ if (!index.IsImmediate()) {
+ throw NotImplementedException("Indirect image indexing");
+ }
+ if (info.type == TextureType::Buffer) {
+ const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())};
+ return def.id;
+ } else {
+ const ImageDefinition def{ctx.images.at(index.U32())};
+ return def.id;
+ }
+}
+
+std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
+ const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
+ const Id semantics{ctx.u32_zero_value};
+ return {scope, semantics};
+}
+
+Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const Id image{Image(ctx, index, info)};
+ const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+} // Anonymous namespace
+
+Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
+ // TODO: This is not yet implemented
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
+ // TODO: This is not yet implemented
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitBindlessImageAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicInc32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicDec32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicOr32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicXor32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicInc32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicDec32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicOr32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicXor32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
new file mode 100644
index 000000000..f99c02848
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -0,0 +1,579 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::SPIRV {
+
+using Sirit::Id;
+
+class EmitContext;
+
+// Microinstruction emitters
+Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
+void EmitVoid(EmitContext& ctx);
+Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
+Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
+void EmitReference(EmitContext&);
+void EmitPhiMove(EmitContext&);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
+Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
+void EmitSetSampleMask(EmitContext& ctx, Id value);
+void EmitSetFragDepth(EmitContext& ctx, Id value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+Id EmitWorkgroupId(EmitContext& ctx);
+Id EmitLocalInvocationId(EmitContext& ctx);
+Id EmitInvocationId(EmitContext& ctx);
+Id EmitSampleId(EmitContext& ctx);
+Id EmitIsHelperInvocation(EmitContext& ctx);
+Id EmitYDirection(EmitContext& ctx);
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
+Id EmitUndefU1(EmitContext& ctx);
+Id EmitUndefU8(EmitContext& ctx);
+Id EmitUndefU16(EmitContext& ctx);
+Id EmitUndefU32(EmitContext& ctx);
+Id EmitUndefU64(EmitContext& ctx);
+void EmitLoadGlobalU8(EmitContext& ctx);
+void EmitLoadGlobalS8(EmitContext& ctx);
+void EmitLoadGlobalU16(EmitContext& ctx);
+void EmitLoadGlobalS16(EmitContext& ctx);
+Id EmitLoadGlobal32(EmitContext& ctx, Id address);
+Id EmitLoadGlobal64(EmitContext& ctx, Id address);
+Id EmitLoadGlobal128(EmitContext& ctx, Id address);
+void EmitWriteGlobalU8(EmitContext& ctx);
+void EmitWriteGlobalS8(EmitContext& ctx);
+void EmitWriteGlobalU16(EmitContext& ctx);
+void EmitWriteGlobalS16(EmitContext& ctx);
+void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value);
+void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value);
+void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value);
+Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
+Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+void EmitBitCastU16F16(EmitContext& ctx);
+Id EmitBitCastU32F32(EmitContext& ctx, Id value);
+void EmitBitCastU64F64(EmitContext& ctx);
+void EmitBitCastF16U16(EmitContext& ctx);
+Id EmitBitCastF32U32(EmitContext& ctx, Id value);
+void EmitBitCastF64U64(EmitContext& ctx);
+Id EmitPackUint2x32(EmitContext& ctx, Id value);
+Id EmitUnpackUint2x32(EmitContext& ctx, Id value);
+Id EmitPackFloat2x16(EmitContext& ctx, Id value);
+Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
+Id EmitPackHalf2x16(EmitContext& ctx, Id value);
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
+Id EmitPackDouble2x32(EmitContext& ctx, Id value);
+Id EmitUnpackDouble2x32(EmitContext& ctx, Id value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+Id EmitFPAbs16(EmitContext& ctx, Id value);
+Id EmitFPAbs32(EmitContext& ctx, Id value);
+Id EmitFPAbs64(EmitContext& ctx, Id value);
+Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPMax32(EmitContext& ctx, Id a, Id b);
+Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
+Id EmitFPMin32(EmitContext& ctx, Id a, Id b);
+Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
+Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPNeg16(EmitContext& ctx, Id value);
+Id EmitFPNeg32(EmitContext& ctx, Id value);
+Id EmitFPNeg64(EmitContext& ctx, Id value);
+Id EmitFPSin(EmitContext& ctx, Id value);
+Id EmitFPCos(EmitContext& ctx, Id value);
+Id EmitFPExp2(EmitContext& ctx, Id value);
+Id EmitFPLog2(EmitContext& ctx, Id value);
+Id EmitFPRecip32(EmitContext& ctx, Id value);
+Id EmitFPRecip64(EmitContext& ctx, Id value);
+Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
+Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
+Id EmitFPSqrt(EmitContext& ctx, Id value);
+Id EmitFPSaturate16(EmitContext& ctx, Id value);
+Id EmitFPSaturate32(EmitContext& ctx, Id value);
+Id EmitFPSaturate64(EmitContext& ctx, Id value);
+Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPRoundEven16(EmitContext& ctx, Id value);
+Id EmitFPRoundEven32(EmitContext& ctx, Id value);
+Id EmitFPRoundEven64(EmitContext& ctx, Id value);
+Id EmitFPFloor16(EmitContext& ctx, Id value);
+Id EmitFPFloor32(EmitContext& ctx, Id value);
+Id EmitFPFloor64(EmitContext& ctx, Id value);
+Id EmitFPCeil16(EmitContext& ctx, Id value);
+Id EmitFPCeil32(EmitContext& ctx, Id value);
+Id EmitFPCeil64(EmitContext& ctx, Id value);
+Id EmitFPTrunc16(EmitContext& ctx, Id value);
+Id EmitFPTrunc32(EmitContext& ctx, Id value);
+Id EmitFPTrunc64(EmitContext& ctx, Id value);
+Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPIsNan16(EmitContext& ctx, Id value);
+Id EmitFPIsNan32(EmitContext& ctx, Id value);
+Id EmitFPIsNan64(EmitContext& ctx, Id value);
+Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
+Id EmitISub32(EmitContext& ctx, Id a, Id b);
+Id EmitISub64(EmitContext& ctx, Id a, Id b);
+Id EmitIMul32(EmitContext& ctx, Id a, Id b);
+Id EmitINeg32(EmitContext& ctx, Id value);
+Id EmitINeg64(EmitContext& ctx, Id value);
+Id EmitIAbs32(EmitContext& ctx, Id value);
+Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
+Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count);
+Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
+Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
+Id EmitBitReverse32(EmitContext& ctx, Id value);
+Id EmitBitCount32(EmitContext& ctx, Id value);
+Id EmitBitwiseNot32(EmitContext& ctx, Id value);
+Id EmitFindSMsb32(EmitContext& ctx, Id value);
+Id EmitFindUMsb32(EmitContext& ctx, Id value);
+Id EmitSMin32(EmitContext& ctx, Id a, Id b);
+Id EmitUMin32(EmitContext& ctx, Id a, Id b);
+Id EmitSMax32(EmitContext& ctx, Id a, Id b);
+Id EmitUMax32(EmitContext& ctx, Id a, Id b);
+Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
+Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
+Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitGlobalAtomicIAdd32(EmitContext& ctx);
+Id EmitGlobalAtomicSMin32(EmitContext& ctx);
+Id EmitGlobalAtomicUMin32(EmitContext& ctx);
+Id EmitGlobalAtomicSMax32(EmitContext& ctx);
+Id EmitGlobalAtomicUMax32(EmitContext& ctx);
+Id EmitGlobalAtomicInc32(EmitContext& ctx);
+Id EmitGlobalAtomicDec32(EmitContext& ctx);
+Id EmitGlobalAtomicAnd32(EmitContext& ctx);
+Id EmitGlobalAtomicOr32(EmitContext& ctx);
+Id EmitGlobalAtomicXor32(EmitContext& ctx);
+Id EmitGlobalAtomicExchange32(EmitContext& ctx);
+Id EmitGlobalAtomicIAdd64(EmitContext& ctx);
+Id EmitGlobalAtomicSMin64(EmitContext& ctx);
+Id EmitGlobalAtomicUMin64(EmitContext& ctx);
+Id EmitGlobalAtomicSMax64(EmitContext& ctx);
+Id EmitGlobalAtomicUMax64(EmitContext& ctx);
+Id EmitGlobalAtomicInc64(EmitContext& ctx);
+Id EmitGlobalAtomicDec64(EmitContext& ctx);
+Id EmitGlobalAtomicAnd64(EmitContext& ctx);
+Id EmitGlobalAtomicOr64(EmitContext& ctx);
+Id EmitGlobalAtomicXor64(EmitContext& ctx);
+Id EmitGlobalAtomicExchange64(EmitContext& ctx);
+Id EmitGlobalAtomicAddF32(EmitContext& ctx);
+Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+Id EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalNot(EmitContext& ctx, Id value);
+Id EmitConvertS16F16(EmitContext& ctx, Id value);
+Id EmitConvertS16F32(EmitContext& ctx, Id value);
+Id EmitConvertS16F64(EmitContext& ctx, Id value);
+Id EmitConvertS32F16(EmitContext& ctx, Id value);
+Id EmitConvertS32F32(EmitContext& ctx, Id value);
+Id EmitConvertS32F64(EmitContext& ctx, Id value);
+Id EmitConvertS64F16(EmitContext& ctx, Id value);
+Id EmitConvertS64F32(EmitContext& ctx, Id value);
+Id EmitConvertS64F64(EmitContext& ctx, Id value);
+Id EmitConvertU16F16(EmitContext& ctx, Id value);
+Id EmitConvertU16F32(EmitContext& ctx, Id value);
+Id EmitConvertU16F64(EmitContext& ctx, Id value);
+Id EmitConvertU32F16(EmitContext& ctx, Id value);
+Id EmitConvertU32F32(EmitContext& ctx, Id value);
+Id EmitConvertU32F64(EmitContext& ctx, Id value);
+Id EmitConvertU64F16(EmitContext& ctx, Id value);
+Id EmitConvertU64F32(EmitContext& ctx, Id value);
+Id EmitConvertU64F64(EmitContext& ctx, Id value);
+Id EmitConvertU64U32(EmitContext& ctx, Id value);
+Id EmitConvertU32U64(EmitContext& ctx, Id value);
+Id EmitConvertF16F32(EmitContext& ctx, Id value);
+Id EmitConvertF32F16(EmitContext& ctx, Id value);
+Id EmitConvertF32F64(EmitContext& ctx, Id value);
+Id EmitConvertF64F32(EmitContext& ctx, Id value);
+Id EmitConvertF16S8(EmitContext& ctx, Id value);
+Id EmitConvertF16S16(EmitContext& ctx, Id value);
+Id EmitConvertF16S32(EmitContext& ctx, Id value);
+Id EmitConvertF16S64(EmitContext& ctx, Id value);
+Id EmitConvertF16U8(EmitContext& ctx, Id value);
+Id EmitConvertF16U16(EmitContext& ctx, Id value);
+Id EmitConvertF16U32(EmitContext& ctx, Id value);
+Id EmitConvertF16U64(EmitContext& ctx, Id value);
+Id EmitConvertF32S8(EmitContext& ctx, Id value);
+Id EmitConvertF32S16(EmitContext& ctx, Id value);
+Id EmitConvertF32S32(EmitContext& ctx, Id value);
+Id EmitConvertF32S64(EmitContext& ctx, Id value);
+Id EmitConvertF32U8(EmitContext& ctx, Id value);
+Id EmitConvertF32U16(EmitContext& ctx, Id value);
+Id EmitConvertF32U32(EmitContext& ctx, Id value);
+Id EmitConvertF32U64(EmitContext& ctx, Id value);
+Id EmitConvertF64S8(EmitContext& ctx, Id value);
+Id EmitConvertF64S16(EmitContext& ctx, Id value);
+Id EmitConvertF64S32(EmitContext& ctx, Id value);
+Id EmitConvertF64S64(EmitContext& ctx, Id value);
+Id EmitConvertF64U8(EmitContext& ctx, Id value);
+Id EmitConvertF64U16(EmitContext& ctx, Id value);
+Id EmitConvertF64U32(EmitContext& ctx, Id value);
+Id EmitConvertF64U64(EmitContext& ctx, Id value);
+Id EmitBindlessImageSampleImplicitLod(EmitContext&);
+Id EmitBindlessImageSampleExplicitLod(EmitContext&);
+Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+Id EmitBindlessImageGather(EmitContext&);
+Id EmitBindlessImageGatherDref(EmitContext&);
+Id EmitBindlessImageFetch(EmitContext&);
+Id EmitBindlessImageQueryDimensions(EmitContext&);
+Id EmitBindlessImageQueryLod(EmitContext&);
+Id EmitBindlessImageGradient(EmitContext&);
+Id EmitBindlessImageRead(EmitContext&);
+Id EmitBindlessImageWrite(EmitContext&);
+Id EmitBoundImageSampleImplicitLod(EmitContext&);
+Id EmitBoundImageSampleExplicitLod(EmitContext&);
+Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+Id EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+Id EmitBoundImageGather(EmitContext&);
+Id EmitBoundImageGatherDref(EmitContext&);
+Id EmitBoundImageFetch(EmitContext&);
+Id EmitBoundImageQueryDimensions(EmitContext&);
+Id EmitBoundImageQueryLod(EmitContext&);
+Id EmitBoundImageGradient(EmitContext&);
+Id EmitBoundImageRead(EmitContext&);
+Id EmitBoundImageWrite(EmitContext&);
+Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id bias_lc, const IR::Value& offset);
+Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id lod, const IR::Value& offset);
+Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+ Id coords, Id dref, Id bias_lc, const IR::Value& offset);
+Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+ Id coords, Id dref, Id lod, const IR::Value& offset);
+Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ const IR::Value& offset, const IR::Value& offset2);
+Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ const IR::Value& offset, const IR::Value& offset2, Id dref);
+Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
+ Id lod, Id ms);
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod);
+Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
+Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id derivates, Id offset, Id lod_clamp);
+Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
+void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
+Id EmitBindlessImageAtomicIAdd32(EmitContext&);
+Id EmitBindlessImageAtomicSMin32(EmitContext&);
+Id EmitBindlessImageAtomicUMin32(EmitContext&);
+Id EmitBindlessImageAtomicSMax32(EmitContext&);
+Id EmitBindlessImageAtomicUMax32(EmitContext&);
+Id EmitBindlessImageAtomicInc32(EmitContext&);
+Id EmitBindlessImageAtomicDec32(EmitContext&);
+Id EmitBindlessImageAtomicAnd32(EmitContext&);
+Id EmitBindlessImageAtomicOr32(EmitContext&);
+Id EmitBindlessImageAtomicXor32(EmitContext&);
+Id EmitBindlessImageAtomicExchange32(EmitContext&);
+Id EmitBoundImageAtomicIAdd32(EmitContext&);
+Id EmitBoundImageAtomicSMin32(EmitContext&);
+Id EmitBoundImageAtomicUMin32(EmitContext&);
+Id EmitBoundImageAtomicSMax32(EmitContext&);
+Id EmitBoundImageAtomicUMax32(EmitContext&);
+Id EmitBoundImageAtomicInc32(EmitContext&);
+Id EmitBoundImageAtomicDec32(EmitContext&);
+Id EmitBoundImageAtomicAnd32(EmitContext&);
+Id EmitBoundImageAtomicOr32(EmitContext&);
+Id EmitBoundImageAtomicXor32(EmitContext&);
+Id EmitBoundImageAtomicExchange32(EmitContext&);
+Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitLaneId(EmitContext& ctx);
+Id EmitVoteAll(EmitContext& ctx, Id pred);
+Id EmitVoteAny(EmitContext& ctx, Id pred);
+Id EmitVoteEqual(EmitContext& ctx, Id pred);
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
+Id EmitSubgroupEqMask(EmitContext& ctx);
+Id EmitSubgroupLtMask(EmitContext& ctx);
+Id EmitSubgroupLeMask(EmitContext& ctx);
+Id EmitSubgroupGtMask(EmitContext& ctx);
+Id EmitSubgroupGeMask(EmitContext& ctx);
+Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask);
+Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask);
+Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask);
+Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask);
+Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle);
+Id EmitDPdxFine(EmitContext& ctx, Id op_a);
+Id EmitDPdyFine(EmitContext& ctx, Id op_a);
+Id EmitDPdxCoarse(EmitContext& ctx, Id op_a);
+Id EmitDPdyCoarse(EmitContext& ctx, Id op_a);
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
new file mode 100644
index 000000000..3501d7495
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -0,0 +1,270 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
+ IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
+ if (!zero) {
+ return;
+ }
+ zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value));
+ zero->Invalidate();
+}
+
+void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
+ IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
+ if (!sign) {
+ return;
+ }
+ sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value));
+ sign->Invalidate();
+}
+} // Anonymous namespace
+
+Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ Id result{};
+ if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
+ const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])};
+ const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)};
+ result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U);
+
+ const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)};
+ carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value));
+ carry->Invalidate();
+ } else {
+ result = ctx.OpIAdd(ctx.U32[1], a, b);
+ }
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
+ // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
+ constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
+ const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)};
+ const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)};
+
+ const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)};
+ const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)};
+ const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)};
+ overflow->SetDefinition(carry_flag);
+ overflow->Invalidate();
+ }
+ return result;
+}
+
+Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpIAdd(ctx.U64, a, b);
+}
+
+Id EmitISub32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpISub(ctx.U32[1], a, b);
+}
+
+Id EmitISub64(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpISub(ctx.U64, a, b);
+}
+
+Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpIMul(ctx.U32[1], a, b);
+}
+
+Id EmitINeg32(EmitContext& ctx, Id value) {
+ return ctx.OpSNegate(ctx.U32[1], value);
+}
+
+Id EmitINeg64(EmitContext& ctx, Id value) {
+ return ctx.OpSNegate(ctx.U64, value);
+}
+
+Id EmitIAbs32(EmitContext& ctx, Id value) {
+ return ctx.OpSAbs(ctx.U32[1], value);
+}
+
+Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftLeftLogical(ctx.U64, base, shift);
+}
+
+Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftRightLogical(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftRightLogical(ctx.U64, base, shift);
+}
+
+Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftRightArithmetic(ctx.U64, base, shift);
+}
+
+Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) {
+ return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count);
+}
+
+Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
+ const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
+ const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitReverse32(EmitContext& ctx, Id value) {
+ return ctx.OpBitReverse(ctx.U32[1], value);
+}
+
+Id EmitBitCount32(EmitContext& ctx, Id value) {
+ return ctx.OpBitCount(ctx.U32[1], value);
+}
+
+Id EmitBitwiseNot32(EmitContext& ctx, Id value) {
+ return ctx.OpNot(ctx.U32[1], value);
+}
+
+Id EmitFindSMsb32(EmitContext& ctx, Id value) {
+ return ctx.OpFindSMsb(ctx.U32[1], value);
+}
+
+Id EmitFindUMsb32(EmitContext& ctx, Id value) {
+ return ctx.OpFindUMsb(ctx.U32[1], value);
+}
+
+Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
+ const bool is_broken{ctx.profile.has_broken_signed_operations};
+ if (is_broken) {
+ a = ctx.OpBitcast(ctx.S32[1], a);
+ b = ctx.OpBitcast(ctx.S32[1], b);
+ }
+ const Id result{ctx.OpSMin(ctx.U32[1], a, b)};
+ return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
+}
+
+Id EmitUMin32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpUMin(ctx.U32[1], a, b);
+}
+
+Id EmitSMax32(EmitContext& ctx, Id a, Id b) {
+ const bool is_broken{ctx.profile.has_broken_signed_operations};
+ if (is_broken) {
+ a = ctx.OpBitcast(ctx.S32[1], a);
+ b = ctx.OpBitcast(ctx.S32[1], b);
+ }
+ const Id result{ctx.OpSMax(ctx.U32[1], a, b)};
+ return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
+}
+
+Id EmitUMax32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpUMax(ctx.U32[1], a, b);
+}
+
+Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
+ Id result{};
+ if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) {
+ value = ctx.OpBitcast(ctx.S32[1], value);
+ min = ctx.OpBitcast(ctx.S32[1], min);
+ max = ctx.OpBitcast(ctx.S32[1], max);
+ if (ctx.profile.has_broken_spirv_clamp) {
+ result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min);
+ } else {
+ result = ctx.OpSClamp(ctx.S32[1], value, min, max);
+ }
+ result = ctx.OpBitcast(ctx.U32[1], result);
+ } else {
+ result = ctx.OpSClamp(ctx.U32[1], value, min, max);
+ }
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
+ Id result{};
+ if (ctx.profile.has_broken_spirv_clamp) {
+ result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min);
+ } else {
+ result = ctx.OpUClamp(ctx.U32[1], value, min, max);
+ }
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpSLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpULessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpIEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpULessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpSGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpUGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpINotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
new file mode 100644
index 000000000..b9a9500fc
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpLogicalOr(ctx.U1, a, b);
+}
+
+Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpLogicalAnd(ctx.U1, a, b);
+}
+
+Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpLogicalNotEqual(ctx.U1, a, b);
+}
+
+Id EmitLogicalNot(EmitContext& ctx, Id value) {
+ return ctx.OpLogicalNot(ctx.U1, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
new file mode 100644
index 000000000..679ee2684
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -0,0 +1,275 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <bit>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size,
+ u32 index_offset = 0) {
+ if (offset.IsImmediate()) {
+ const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset};
+ return ctx.Const(imm_offset);
+ }
+ const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
+ Id index{ctx.Def(offset)};
+ if (shift != 0) {
+ const Id shift_id{ctx.Const(shift)};
+ index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
+ }
+ if (index_offset != 0) {
+ index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
+ }
+ return index;
+}
+
+Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ const StorageTypeDefinition& type_def, size_t element_size,
+ Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Dynamic storage buffer indexing");
+ }
+ const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
+ const Id index{StorageIndex(ctx, offset, element_size, index_offset)};
+ return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
+}
+
+Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type,
+ const StorageTypeDefinition& type_def, size_t element_size,
+ Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+ const Id pointer{
+ StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
+ return ctx.OpLoad(result_type, pointer);
+}
+
+Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ u32 index_offset = 0) {
+ return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32),
+ &StorageDefinitions::U32, index_offset);
+}
+
+void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+ const StorageTypeDefinition& type_def, size_t element_size,
+ Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+ const Id pointer{
+ StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
+ ctx.OpStore(pointer, value);
+}
+
+void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+ u32 index_offset = 0) {
+ WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
+ &StorageDefinitions::U32, index_offset);
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalS8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalU16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalS16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
+ if (ctx.profile.support_int64) {
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address);
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+ return ctx.Const(0u);
+}
+
+Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
+ if (ctx.profile.support_int64) {
+ return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address);
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+ return ctx.Const(0u, 0u);
+}
+
+Id EmitLoadGlobal128(EmitContext& ctx, Id address) {
+ if (ctx.profile.support_int64) {
+ return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address);
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+ return ctx.Const(0u, 0u, 0u, 0u);
+}
+
+void EmitWriteGlobalU8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalS8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalU16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalS16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
+ if (ctx.profile.support_int64) {
+ ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value);
+ return;
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
+ if (ctx.profile.support_int64) {
+ ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value);
+ return;
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) {
+ if (ctx.profile.support_int64) {
+ ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value);
+ return;
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
+ return ctx.OpUConvert(ctx.U32[1],
+ LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8,
+ sizeof(u8), &StorageDefinitions::U8));
+ } else {
+ return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+ ctx.BitOffset8(offset), ctx.Const(8u));
+ }
+}
+
+Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
+ return ctx.OpSConvert(ctx.U32[1],
+ LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8,
+ sizeof(s8), &StorageDefinitions::S8));
+ } else {
+ return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+ ctx.BitOffset8(offset), ctx.Const(8u));
+ }
+}
+
+Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
+ return ctx.OpUConvert(ctx.U32[1],
+ LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16,
+ sizeof(u16), &StorageDefinitions::U16));
+ } else {
+ return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+ ctx.BitOffset16(offset), ctx.Const(16u));
+ }
+}
+
+Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
+ return ctx.OpSConvert(ctx.U32[1],
+ LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16,
+ sizeof(s16), &StorageDefinitions::S16));
+ } else {
+ return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+ ctx.BitOffset16(offset), ctx.Const(16u));
+ }
+}
+
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ return LoadStorage32(ctx, binding, offset);
+}
+
+Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2,
+ sizeof(u32[2]), &StorageDefinitions::U32x2);
+ } else {
+ return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0),
+ LoadStorage32(ctx, binding, offset, 1));
+ }
+}
+
+Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4,
+ sizeof(u32[4]), &StorageDefinitions::U32x4);
+ } else {
+ return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0),
+ LoadStorage32(ctx, binding, offset, 1),
+ LoadStorage32(ctx, binding, offset, 2),
+ LoadStorage32(ctx, binding, offset, 3));
+ }
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
+ sizeof(u8), &StorageDefinitions::U8);
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
+ sizeof(s8), &StorageDefinitions::S8);
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
+ sizeof(u16), &StorageDefinitions::U16);
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
+ sizeof(s16), &StorageDefinitions::S16);
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage32(ctx, binding, offset, value);
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]),
+ &StorageDefinitions::U32x2);
+ } else {
+ for (u32 index = 0; index < 2; ++index) {
+ const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
+ WriteStorage32(ctx, binding, offset, element, index);
+ }
+ }
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]),
+ &StorageDefinitions::U32x4);
+ } else {
+ for (u32 index = 0; index < 4; ++index) {
+ const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
+ WriteStorage32(ctx, binding, offset, element, index);
+ }
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
new file mode 100644
index 000000000..c5b4f4720
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -0,0 +1,42 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.U1, cond, true_value, false_value);
+}
+
+Id EmitSelectU8(EmitContext&, Id, Id, Id) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.U16, cond, true_value, false_value);
+}
+
+Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);
+}
+
+Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.U64, cond, true_value, false_value);
+}
+
+Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value);
+}
+
+Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value);
+}
+
+Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
new file mode 100644
index 000000000..9a79fc7a2
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -0,0 +1,174 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
+ const Id shift_id{ctx.Const(shift)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
+}
+
+Id Word(EmitContext& ctx, Id offset) {
+ const Id shift_id{ctx.Const(2U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
+ const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))};
+ const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))};
+ const Id count_id{ctx.Const(count)};
+ return {bit, count_id};
+}
+} // Anonymous namespace
+
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+ } else {
+ return Word(ctx, offset);
+ }
+}
+
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+ return ctx.OpLoad(ctx.U32[2], pointer);
+ } else {
+ const Id shift_id{ctx.Const(2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))};
+ const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
+ const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
+ return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
+ ctx.OpLoad(ctx.U32[1], rhs_pointer));
+ }
+}
+
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+ return ctx.OpLoad(ctx.U32[4], pointer);
+ }
+ const Id shift_id{ctx.Const(2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ std::array<Id, 4> values{};
+ for (u32 i = 0; i < 4; ++i) {
+ const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ values[i] = ctx.OpLoad(ctx.U32[1], pointer);
+ }
+ return ctx.OpCompositeConstruct(ctx.U32[4], values);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
+ } else {
+ ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
+ }
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
+ } else {
+ ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
+ }
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
+ Id pointer{};
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
+ } else {
+ const Id shift{ctx.Const(2U)};
+ const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
+ }
+ ctx.OpStore(pointer, value);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+ ctx.OpStore(pointer, value);
+ return;
+ }
+ const Id shift{ctx.Const(2U)};
+ const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))};
+ const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
+ const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
+ ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
+ ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+ ctx.OpStore(pointer, value);
+ return;
+ }
+ const Id shift{ctx.Const(2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ for (u32 i = 0; i < 4; ++i) {
+ const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
new file mode 100644
index 000000000..9e7eb3cb1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -0,0 +1,150 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void ConvertDepthMode(EmitContext& ctx) {
+ const Id type{ctx.F32[1]};
+ const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)};
+ const Id z{ctx.OpCompositeExtract(type, position, 2u)};
+ const Id w{ctx.OpCompositeExtract(type, position, 3u)};
+ const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))};
+ const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)};
+ ctx.OpStore(ctx.output_position, vector);
+}
+
+void SetFixedPipelinePointSize(EmitContext& ctx) {
+ if (ctx.runtime_info.fixed_state_point_size) {
+ const float point_size{*ctx.runtime_info.fixed_state_point_size};
+ ctx.OpStore(ctx.output_point_size, ctx.Const(point_size));
+ }
+}
+
+Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one,
+ Id default_vector) {
+ switch (num_components) {
+ case 1:
+ return element == 3 ? one : zero;
+ case 2:
+ return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero);
+ case 3:
+ return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero);
+ case 4:
+ return default_vector;
+ }
+ throw InvalidArgument("Bad element");
+}
+
+Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) {
+ switch (comparison) {
+ case CompareFunction::Never:
+ return ctx.false_value;
+ case CompareFunction::Less:
+ return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2);
+ case CompareFunction::Equal:
+ return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2);
+ case CompareFunction::LessThanEqual:
+ return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2);
+ case CompareFunction::Greater:
+ return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2);
+ case CompareFunction::NotEqual:
+ return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2);
+ case CompareFunction::GreaterThanEqual:
+ return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2);
+ case CompareFunction::Always:
+ return ctx.true_value;
+ }
+ throw InvalidArgument("Comparison function {}", comparison);
+}
+
+void AlphaTest(EmitContext& ctx) {
+ if (!ctx.runtime_info.alpha_test_func) {
+ return;
+ }
+ const auto comparison{*ctx.runtime_info.alpha_test_func};
+ if (comparison == CompareFunction::Always) {
+ return;
+ }
+ if (!Sirit::ValidId(ctx.frag_color[0])) {
+ return;
+ }
+
+ const Id type{ctx.F32[1]};
+ const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])};
+ const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)};
+
+ const Id true_label{ctx.OpLabel()};
+ const Id discard_label{ctx.OpLabel()};
+ const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)};
+ const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)};
+
+ ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone);
+ ctx.OpBranchConditional(condition, true_label, discard_label);
+ ctx.AddLabel(discard_label);
+ ctx.OpKill();
+ ctx.AddLabel(true_label);
+}
+} // Anonymous namespace
+
+void EmitPrologue(EmitContext& ctx) {
+ if (ctx.stage == Stage::VertexB) {
+ const Id zero{ctx.Const(0.0f)};
+ const Id one{ctx.Const(1.0f)};
+ const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)};
+ ctx.OpStore(ctx.output_position, default_vector);
+ for (const auto& info : ctx.output_generics) {
+ if (info[0].num_components == 0) {
+ continue;
+ }
+ u32 element{0};
+ while (element < 4) {
+ const auto& element_info{info[element]};
+ const u32 num{element_info.num_components};
+ const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)};
+ ctx.OpStore(element_info.id, value);
+ element += num;
+ }
+ }
+ }
+ if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
+ SetFixedPipelinePointSize(ctx);
+ }
+}
+
+void EmitEpilogue(EmitContext& ctx) {
+ if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) {
+ ConvertDepthMode(ctx);
+ }
+ if (ctx.stage == Stage::Fragment) {
+ AlphaTest(ctx);
+ }
+}
+
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
+ if (ctx.runtime_info.convert_depth_mode) {
+ ConvertDepthMode(ctx);
+ }
+ if (stream.IsImmediate()) {
+ ctx.OpEmitStreamVertex(ctx.Def(stream));
+ } else {
+ LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
+ ctx.OpEmitStreamVertex(ctx.u32_zero_value);
+ }
+ // Restore fixed pipeline point size after emitting the vertex
+ SetFixedPipelinePointSize(ctx);
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+ if (stream.IsImmediate()) {
+ ctx.OpEndStreamPrimitive(ctx.Def(stream));
+ } else {
+ LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
+ ctx.OpEndStreamPrimitive(ctx.u32_zero_value);
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
new file mode 100644
index 000000000..c9f469e90
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitUndefU1(EmitContext& ctx) {
+ return ctx.OpUndef(ctx.U1);
+}
+
+Id EmitUndefU8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitUndefU16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitUndefU32(EmitContext& ctx) {
+ return ctx.OpUndef(ctx.U32[1]);
+}
+
+Id EmitUndefU64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
new file mode 100644
index 000000000..78b1e1ba7
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -0,0 +1,203 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id WarpExtract(EmitContext& ctx, Id value) {
+ const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+}
+
+Id LoadMask(EmitContext& ctx, Id mask) {
+ const Id value{ctx.OpLoad(ctx.U32[4], mask)};
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpCompositeExtract(ctx.U32[1], value, 0U);
+ }
+ return WarpExtract(ctx, value);
+}
+
+void SetInBoundsFlag(IR::Inst* inst, Id result) {
+ IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+ if (!in_bounds) {
+ return;
+ }
+ in_bounds->SetDefinition(result);
+ in_bounds->Invalidate();
+}
+
+Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) {
+ return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask);
+}
+
+Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) {
+ return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id,
+ ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask));
+}
+
+Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) {
+ const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
+ const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
+ return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask);
+}
+
+Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
+ return ctx.OpSelect(ctx.U32[1], in_range,
+ ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
+}
+} // Anonymous namespace
+
+Id EmitLaneId(EmitContext& ctx) {
+ const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return id;
+ }
+ return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U));
+}
+
+Id EmitVoteAll(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAllKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{WarpExtract(ctx, mask_ballot)};
+ const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpIEqual(ctx.U1, lhs, active_mask);
+}
+
+Id EmitVoteAny(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{WarpExtract(ctx, mask_ballot)};
+ const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
+}
+
+Id EmitVoteEqual(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{WarpExtract(ctx, mask_ballot)};
+ const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
+ ctx.OpIEqual(ctx.U1, lhs, active_mask));
+}
+
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
+ const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
+ }
+ return WarpExtract(ctx, ballot);
+}
+
+Id EmitSubgroupEqMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_eq);
+}
+
+Id EmitSubgroupLtMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_lt);
+}
+
+Id EmitSubgroupLeMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_le);
+}
+
+Id EmitSubgroupGtMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_gt);
+}
+
+Id EmitSubgroupGeMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_ge);
+}
+
+Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask) {
+ const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
+ const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
+ const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
+
+ const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
+ const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
+ const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+ SetInBoundsFlag(inst, in_range);
+ return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask) {
+ const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+ const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
+ const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+ SetInBoundsFlag(inst, in_range);
+ return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask) {
+ const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+ const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
+ const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+ SetInBoundsFlag(inst, in_range);
+ return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask) {
+ const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+ const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
+ const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+ SetInBoundsFlag(inst, in_range);
+ return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
+ const Id three{ctx.Const(3U)};
+ Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
+ mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U));
+ mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask);
+ mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
+
+ const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)};
+ const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)};
+
+ const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)};
+ const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)};
+ return ctx.OpFAdd(ctx.F32[1], result_a, result_b);
+}
+
+Id EmitDPdxFine(EmitContext& ctx, Id op_a) {
+ return ctx.OpDPdxFine(ctx.F32[1], op_a);
+}
+
+Id EmitDPdyFine(EmitContext& ctx, Id op_a) {
+ return ctx.OpDPdyFine(ctx.F32[1], op_a);
+}
+
+Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) {
+ return ctx.OpDPdxCoarse(ctx.F32[1], op_a);
+}
+
+Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) {
+ return ctx.OpDPdyCoarse(ctx.F32[1], op_a);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
new file mode 100644
index 000000000..8369d0d84
--- /dev/null
+++ b/src/shader_recompiler/environment.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <array>
+
+#include "common/common_types.h"
+#include "shader_recompiler/program_header.h"
+#include "shader_recompiler/shader_info.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+
+class Environment {
+public:
+ virtual ~Environment() = default;
+
+ [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
+
+ [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0;
+
+ [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
+
+ [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
+
+ [[nodiscard]] virtual u32 LocalMemorySize() const = 0;
+
+ [[nodiscard]] virtual u32 SharedMemorySize() const = 0;
+
+ [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
+
+ [[nodiscard]] const ProgramHeader& SPH() const noexcept {
+ return sph;
+ }
+
+ [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept {
+ return gp_passthrough_mask;
+ }
+
+ [[nodiscard]] Stage ShaderStage() const noexcept {
+ return stage;
+ }
+
+ [[nodiscard]] u32 StartAddress() const noexcept {
+ return start_address;
+ }
+
+protected:
+ ProgramHeader sph{};
+ std::array<u32, 8> gp_passthrough_mask{};
+ Stage stage{};
+ u32 start_address{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h
new file mode 100644
index 000000000..337e7f0c8
--- /dev/null
+++ b/src/shader_recompiler/exception.h
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include <fmt/format.h>
+
+namespace Shader {
+
+class Exception : public std::exception {
+public:
+ explicit Exception(std::string message) noexcept : err_message{std::move(message)} {}
+
+ const char* what() const noexcept override {
+ return err_message.c_str();
+ }
+
+ void Prepend(std::string_view prepend) {
+ err_message.insert(0, prepend);
+ }
+
+ void Append(std::string_view append) {
+ err_message += append;
+ }
+
+private:
+ std::string err_message;
+};
+
+class LogicError : public Exception {
+public:
+ template <typename... Args>
+ LogicError(const char* message, Args&&... args)
+ : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+class RuntimeError : public Exception {
+public:
+ template <typename... Args>
+ RuntimeError(const char* message, Args&&... args)
+ : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+class NotImplementedException : public Exception {
+public:
+ template <typename... Args>
+ NotImplementedException(const char* message, Args&&... args)
+ : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {
+ Append(" is not implemented");
+ }
+};
+
+class InvalidArgument : public Exception {
+public:
+ template <typename... Args>
+ InvalidArgument(const char* message, Args&&... args)
+ : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
new file mode 100644
index 000000000..b61773487
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class Block;
+
+struct AbstractSyntaxNode {
+ enum class Type {
+ Block,
+ If,
+ EndIf,
+ Loop,
+ Repeat,
+ Break,
+ Return,
+ Unreachable,
+ };
+ union Data {
+ Block* block;
+ struct {
+ U1 cond;
+ Block* body;
+ Block* merge;
+ } if_node;
+ struct {
+ Block* merge;
+ } end_if;
+ struct {
+ Block* body;
+ Block* continue_block;
+ Block* merge;
+ } loop;
+ struct {
+ U1 cond;
+ Block* loop_header;
+ Block* merge;
+ } repeat;
+ struct {
+ U1 cond;
+ Block* merge;
+ Block* skip;
+ } break_node;
+ };
+
+ Data data{};
+ Type type{};
+};
+using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
new file mode 100644
index 000000000..4d0b8b8e5
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -0,0 +1,454 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/attribute.h"
+
+namespace Shader::IR {
+
+bool IsGeneric(Attribute attribute) noexcept {
+ return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
+}
+
+u32 GenericAttributeIndex(Attribute attribute) {
+ if (!IsGeneric(attribute)) {
+ throw InvalidArgument("Attribute is not generic {}", attribute);
+ }
+ return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
+}
+
+u32 GenericAttributeElement(Attribute attribute) {
+ if (!IsGeneric(attribute)) {
+ throw InvalidArgument("Attribute is not generic {}", attribute);
+ }
+ return static_cast<u32>(attribute) % 4;
+}
+
+std::string NameOf(Attribute attribute) {
+ switch (attribute) {
+ case Attribute::PrimitiveId:
+ return "PrimitiveId";
+ case Attribute::Layer:
+ return "Layer";
+ case Attribute::ViewportIndex:
+ return "ViewportIndex";
+ case Attribute::PointSize:
+ return "PointSize";
+ case Attribute::PositionX:
+ return "Position.X";
+ case Attribute::PositionY:
+ return "Position.Y";
+ case Attribute::PositionZ:
+ return "Position.Z";
+ case Attribute::PositionW:
+ return "Position.W";
+ case Attribute::Generic0X:
+ return "Generic[0].X";
+ case Attribute::Generic0Y:
+ return "Generic[0].Y";
+ case Attribute::Generic0Z:
+ return "Generic[0].Z";
+ case Attribute::Generic0W:
+ return "Generic[0].W";
+ case Attribute::Generic1X:
+ return "Generic[1].X";
+ case Attribute::Generic1Y:
+ return "Generic[1].Y";
+ case Attribute::Generic1Z:
+ return "Generic[1].Z";
+ case Attribute::Generic1W:
+ return "Generic[1].W";
+ case Attribute::Generic2X:
+ return "Generic[2].X";
+ case Attribute::Generic2Y:
+ return "Generic[2].Y";
+ case Attribute::Generic2Z:
+ return "Generic[2].Z";
+ case Attribute::Generic2W:
+ return "Generic[2].W";
+ case Attribute::Generic3X:
+ return "Generic[3].X";
+ case Attribute::Generic3Y:
+ return "Generic[3].Y";
+ case Attribute::Generic3Z:
+ return "Generic[3].Z";
+ case Attribute::Generic3W:
+ return "Generic[3].W";
+ case Attribute::Generic4X:
+ return "Generic[4].X";
+ case Attribute::Generic4Y:
+ return "Generic[4].Y";
+ case Attribute::Generic4Z:
+ return "Generic[4].Z";
+ case Attribute::Generic4W:
+ return "Generic[4].W";
+ case Attribute::Generic5X:
+ return "Generic[5].X";
+ case Attribute::Generic5Y:
+ return "Generic[5].Y";
+ case Attribute::Generic5Z:
+ return "Generic[5].Z";
+ case Attribute::Generic5W:
+ return "Generic[5].W";
+ case Attribute::Generic6X:
+ return "Generic[6].X";
+ case Attribute::Generic6Y:
+ return "Generic[6].Y";
+ case Attribute::Generic6Z:
+ return "Generic[6].Z";
+ case Attribute::Generic6W:
+ return "Generic[6].W";
+ case Attribute::Generic7X:
+ return "Generic[7].X";
+ case Attribute::Generic7Y:
+ return "Generic[7].Y";
+ case Attribute::Generic7Z:
+ return "Generic[7].Z";
+ case Attribute::Generic7W:
+ return "Generic[7].W";
+ case Attribute::Generic8X:
+ return "Generic[8].X";
+ case Attribute::Generic8Y:
+ return "Generic[8].Y";
+ case Attribute::Generic8Z:
+ return "Generic[8].Z";
+ case Attribute::Generic8W:
+ return "Generic[8].W";
+ case Attribute::Generic9X:
+ return "Generic[9].X";
+ case Attribute::Generic9Y:
+ return "Generic[9].Y";
+ case Attribute::Generic9Z:
+ return "Generic[9].Z";
+ case Attribute::Generic9W:
+ return "Generic[9].W";
+ case Attribute::Generic10X:
+ return "Generic[10].X";
+ case Attribute::Generic10Y:
+ return "Generic[10].Y";
+ case Attribute::Generic10Z:
+ return "Generic[10].Z";
+ case Attribute::Generic10W:
+ return "Generic[10].W";
+ case Attribute::Generic11X:
+ return "Generic[11].X";
+ case Attribute::Generic11Y:
+ return "Generic[11].Y";
+ case Attribute::Generic11Z:
+ return "Generic[11].Z";
+ case Attribute::Generic11W:
+ return "Generic[11].W";
+ case Attribute::Generic12X:
+ return "Generic[12].X";
+ case Attribute::Generic12Y:
+ return "Generic[12].Y";
+ case Attribute::Generic12Z:
+ return "Generic[12].Z";
+ case Attribute::Generic12W:
+ return "Generic[12].W";
+ case Attribute::Generic13X:
+ return "Generic[13].X";
+ case Attribute::Generic13Y:
+ return "Generic[13].Y";
+ case Attribute::Generic13Z:
+ return "Generic[13].Z";
+ case Attribute::Generic13W:
+ return "Generic[13].W";
+ case Attribute::Generic14X:
+ return "Generic[14].X";
+ case Attribute::Generic14Y:
+ return "Generic[14].Y";
+ case Attribute::Generic14Z:
+ return "Generic[14].Z";
+ case Attribute::Generic14W:
+ return "Generic[14].W";
+ case Attribute::Generic15X:
+ return "Generic[15].X";
+ case Attribute::Generic15Y:
+ return "Generic[15].Y";
+ case Attribute::Generic15Z:
+ return "Generic[15].Z";
+ case Attribute::Generic15W:
+ return "Generic[15].W";
+ case Attribute::Generic16X:
+ return "Generic[16].X";
+ case Attribute::Generic16Y:
+ return "Generic[16].Y";
+ case Attribute::Generic16Z:
+ return "Generic[16].Z";
+ case Attribute::Generic16W:
+ return "Generic[16].W";
+ case Attribute::Generic17X:
+ return "Generic[17].X";
+ case Attribute::Generic17Y:
+ return "Generic[17].Y";
+ case Attribute::Generic17Z:
+ return "Generic[17].Z";
+ case Attribute::Generic17W:
+ return "Generic[17].W";
+ case Attribute::Generic18X:
+ return "Generic[18].X";
+ case Attribute::Generic18Y:
+ return "Generic[18].Y";
+ case Attribute::Generic18Z:
+ return "Generic[18].Z";
+ case Attribute::Generic18W:
+ return "Generic[18].W";
+ case Attribute::Generic19X:
+ return "Generic[19].X";
+ case Attribute::Generic19Y:
+ return "Generic[19].Y";
+ case Attribute::Generic19Z:
+ return "Generic[19].Z";
+ case Attribute::Generic19W:
+ return "Generic[19].W";
+ case Attribute::Generic20X:
+ return "Generic[20].X";
+ case Attribute::Generic20Y:
+ return "Generic[20].Y";
+ case Attribute::Generic20Z:
+ return "Generic[20].Z";
+ case Attribute::Generic20W:
+ return "Generic[20].W";
+ case Attribute::Generic21X:
+ return "Generic[21].X";
+ case Attribute::Generic21Y:
+ return "Generic[21].Y";
+ case Attribute::Generic21Z:
+ return "Generic[21].Z";
+ case Attribute::Generic21W:
+ return "Generic[21].W";
+ case Attribute::Generic22X:
+ return "Generic[22].X";
+ case Attribute::Generic22Y:
+ return "Generic[22].Y";
+ case Attribute::Generic22Z:
+ return "Generic[22].Z";
+ case Attribute::Generic22W:
+ return "Generic[22].W";
+ case Attribute::Generic23X:
+ return "Generic[23].X";
+ case Attribute::Generic23Y:
+ return "Generic[23].Y";
+ case Attribute::Generic23Z:
+ return "Generic[23].Z";
+ case Attribute::Generic23W:
+ return "Generic[23].W";
+ case Attribute::Generic24X:
+ return "Generic[24].X";
+ case Attribute::Generic24Y:
+ return "Generic[24].Y";
+ case Attribute::Generic24Z:
+ return "Generic[24].Z";
+ case Attribute::Generic24W:
+ return "Generic[24].W";
+ case Attribute::Generic25X:
+ return "Generic[25].X";
+ case Attribute::Generic25Y:
+ return "Generic[25].Y";
+ case Attribute::Generic25Z:
+ return "Generic[25].Z";
+ case Attribute::Generic25W:
+ return "Generic[25].W";
+ case Attribute::Generic26X:
+ return "Generic[26].X";
+ case Attribute::Generic26Y:
+ return "Generic[26].Y";
+ case Attribute::Generic26Z:
+ return "Generic[26].Z";
+ case Attribute::Generic26W:
+ return "Generic[26].W";
+ case Attribute::Generic27X:
+ return "Generic[27].X";
+ case Attribute::Generic27Y:
+ return "Generic[27].Y";
+ case Attribute::Generic27Z:
+ return "Generic[27].Z";
+ case Attribute::Generic27W:
+ return "Generic[27].W";
+ case Attribute::Generic28X:
+ return "Generic[28].X";
+ case Attribute::Generic28Y:
+ return "Generic[28].Y";
+ case Attribute::Generic28Z:
+ return "Generic[28].Z";
+ case Attribute::Generic28W:
+ return "Generic[28].W";
+ case Attribute::Generic29X:
+ return "Generic[29].X";
+ case Attribute::Generic29Y:
+ return "Generic[29].Y";
+ case Attribute::Generic29Z:
+ return "Generic[29].Z";
+ case Attribute::Generic29W:
+ return "Generic[29].W";
+ case Attribute::Generic30X:
+ return "Generic[30].X";
+ case Attribute::Generic30Y:
+ return "Generic[30].Y";
+ case Attribute::Generic30Z:
+ return "Generic[30].Z";
+ case Attribute::Generic30W:
+ return "Generic[30].W";
+ case Attribute::Generic31X:
+ return "Generic[31].X";
+ case Attribute::Generic31Y:
+ return "Generic[31].Y";
+ case Attribute::Generic31Z:
+ return "Generic[31].Z";
+ case Attribute::Generic31W:
+ return "Generic[31].W";
+ case Attribute::ColorFrontDiffuseR:
+ return "ColorFrontDiffuse.R";
+ case Attribute::ColorFrontDiffuseG:
+ return "ColorFrontDiffuse.G";
+ case Attribute::ColorFrontDiffuseB:
+ return "ColorFrontDiffuse.B";
+ case Attribute::ColorFrontDiffuseA:
+ return "ColorFrontDiffuse.A";
+ case Attribute::ColorFrontSpecularR:
+ return "ColorFrontSpecular.R";
+ case Attribute::ColorFrontSpecularG:
+ return "ColorFrontSpecular.G";
+ case Attribute::ColorFrontSpecularB:
+ return "ColorFrontSpecular.B";
+ case Attribute::ColorFrontSpecularA:
+ return "ColorFrontSpecular.A";
+ case Attribute::ColorBackDiffuseR:
+ return "ColorBackDiffuse.R";
+ case Attribute::ColorBackDiffuseG:
+ return "ColorBackDiffuse.G";
+ case Attribute::ColorBackDiffuseB:
+ return "ColorBackDiffuse.B";
+ case Attribute::ColorBackDiffuseA:
+ return "ColorBackDiffuse.A";
+ case Attribute::ColorBackSpecularR:
+ return "ColorBackSpecular.R";
+ case Attribute::ColorBackSpecularG:
+ return "ColorBackSpecular.G";
+ case Attribute::ColorBackSpecularB:
+ return "ColorBackSpecular.B";
+ case Attribute::ColorBackSpecularA:
+ return "ColorBackSpecular.A";
+ case Attribute::ClipDistance0:
+ return "ClipDistance[0]";
+ case Attribute::ClipDistance1:
+ return "ClipDistance[1]";
+ case Attribute::ClipDistance2:
+ return "ClipDistance[2]";
+ case Attribute::ClipDistance3:
+ return "ClipDistance[3]";
+ case Attribute::ClipDistance4:
+ return "ClipDistance[4]";
+ case Attribute::ClipDistance5:
+ return "ClipDistance[5]";
+ case Attribute::ClipDistance6:
+ return "ClipDistance[6]";
+ case Attribute::ClipDistance7:
+ return "ClipDistance[7]";
+ case Attribute::PointSpriteS:
+ return "PointSprite.S";
+ case Attribute::PointSpriteT:
+ return "PointSprite.T";
+ case Attribute::FogCoordinate:
+ return "FogCoordinate";
+ case Attribute::TessellationEvaluationPointU:
+ return "TessellationEvaluationPoint.U";
+ case Attribute::TessellationEvaluationPointV:
+ return "TessellationEvaluationPoint.V";
+ case Attribute::InstanceId:
+ return "InstanceId";
+ case Attribute::VertexId:
+ return "VertexId";
+ case Attribute::FixedFncTexture0S:
+ return "FixedFncTexture[0].S";
+ case Attribute::FixedFncTexture0T:
+ return "FixedFncTexture[0].T";
+ case Attribute::FixedFncTexture0R:
+ return "FixedFncTexture[0].R";
+ case Attribute::FixedFncTexture0Q:
+ return "FixedFncTexture[0].Q";
+ case Attribute::FixedFncTexture1S:
+ return "FixedFncTexture[1].S";
+ case Attribute::FixedFncTexture1T:
+ return "FixedFncTexture[1].T";
+ case Attribute::FixedFncTexture1R:
+ return "FixedFncTexture[1].R";
+ case Attribute::FixedFncTexture1Q:
+ return "FixedFncTexture[1].Q";
+ case Attribute::FixedFncTexture2S:
+ return "FixedFncTexture[2].S";
+ case Attribute::FixedFncTexture2T:
+ return "FixedFncTexture[2].T";
+ case Attribute::FixedFncTexture2R:
+ return "FixedFncTexture[2].R";
+ case Attribute::FixedFncTexture2Q:
+ return "FixedFncTexture[2].Q";
+ case Attribute::FixedFncTexture3S:
+ return "FixedFncTexture[3].S";
+ case Attribute::FixedFncTexture3T:
+ return "FixedFncTexture[3].T";
+ case Attribute::FixedFncTexture3R:
+ return "FixedFncTexture[3].R";
+ case Attribute::FixedFncTexture3Q:
+ return "FixedFncTexture[3].Q";
+ case Attribute::FixedFncTexture4S:
+ return "FixedFncTexture[4].S";
+ case Attribute::FixedFncTexture4T:
+ return "FixedFncTexture[4].T";
+ case Attribute::FixedFncTexture4R:
+ return "FixedFncTexture[4].R";
+ case Attribute::FixedFncTexture4Q:
+ return "FixedFncTexture[4].Q";
+ case Attribute::FixedFncTexture5S:
+ return "FixedFncTexture[5].S";
+ case Attribute::FixedFncTexture5T:
+ return "FixedFncTexture[5].T";
+ case Attribute::FixedFncTexture5R:
+ return "FixedFncTexture[5].R";
+ case Attribute::FixedFncTexture5Q:
+ return "FixedFncTexture[5].Q";
+ case Attribute::FixedFncTexture6S:
+ return "FixedFncTexture[6].S";
+ case Attribute::FixedFncTexture6T:
+ return "FixedFncTexture[6].T";
+ case Attribute::FixedFncTexture6R:
+ return "FixedFncTexture[6].R";
+ case Attribute::FixedFncTexture6Q:
+ return "FixedFncTexture[6].Q";
+ case Attribute::FixedFncTexture7S:
+ return "FixedFncTexture[7].S";
+ case Attribute::FixedFncTexture7T:
+ return "FixedFncTexture[7].T";
+ case Attribute::FixedFncTexture7R:
+ return "FixedFncTexture[7].R";
+ case Attribute::FixedFncTexture7Q:
+ return "FixedFncTexture[7].Q";
+ case Attribute::FixedFncTexture8S:
+ return "FixedFncTexture[8].S";
+ case Attribute::FixedFncTexture8T:
+ return "FixedFncTexture[8].T";
+ case Attribute::FixedFncTexture8R:
+ return "FixedFncTexture[8].R";
+ case Attribute::FixedFncTexture8Q:
+ return "FixedFncTexture[8].Q";
+ case Attribute::FixedFncTexture9S:
+ return "FixedFncTexture[9].S";
+ case Attribute::FixedFncTexture9T:
+ return "FixedFncTexture[9].T";
+ case Attribute::FixedFncTexture9R:
+ return "FixedFncTexture[9].R";
+ case Attribute::FixedFncTexture9Q:
+ return "FixedFncTexture[9].Q";
+ case Attribute::ViewportMask:
+ return "ViewportMask";
+ case Attribute::FrontFace:
+ return "FrontFace";
+ }
+ return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
new file mode 100644
index 000000000..ca1199494
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -0,0 +1,250 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class Attribute : u64 {
+ PrimitiveId = 24,
+ Layer = 25,
+ ViewportIndex = 26,
+ PointSize = 27,
+ PositionX = 28,
+ PositionY = 29,
+ PositionZ = 30,
+ PositionW = 31,
+ Generic0X = 32,
+ Generic0Y = 33,
+ Generic0Z = 34,
+ Generic0W = 35,
+ Generic1X = 36,
+ Generic1Y = 37,
+ Generic1Z = 38,
+ Generic1W = 39,
+ Generic2X = 40,
+ Generic2Y = 41,
+ Generic2Z = 42,
+ Generic2W = 43,
+ Generic3X = 44,
+ Generic3Y = 45,
+ Generic3Z = 46,
+ Generic3W = 47,
+ Generic4X = 48,
+ Generic4Y = 49,
+ Generic4Z = 50,
+ Generic4W = 51,
+ Generic5X = 52,
+ Generic5Y = 53,
+ Generic5Z = 54,
+ Generic5W = 55,
+ Generic6X = 56,
+ Generic6Y = 57,
+ Generic6Z = 58,
+ Generic6W = 59,
+ Generic7X = 60,
+ Generic7Y = 61,
+ Generic7Z = 62,
+ Generic7W = 63,
+ Generic8X = 64,
+ Generic8Y = 65,
+ Generic8Z = 66,
+ Generic8W = 67,
+ Generic9X = 68,
+ Generic9Y = 69,
+ Generic9Z = 70,
+ Generic9W = 71,
+ Generic10X = 72,
+ Generic10Y = 73,
+ Generic10Z = 74,
+ Generic10W = 75,
+ Generic11X = 76,
+ Generic11Y = 77,
+ Generic11Z = 78,
+ Generic11W = 79,
+ Generic12X = 80,
+ Generic12Y = 81,
+ Generic12Z = 82,
+ Generic12W = 83,
+ Generic13X = 84,
+ Generic13Y = 85,
+ Generic13Z = 86,
+ Generic13W = 87,
+ Generic14X = 88,
+ Generic14Y = 89,
+ Generic14Z = 90,
+ Generic14W = 91,
+ Generic15X = 92,
+ Generic15Y = 93,
+ Generic15Z = 94,
+ Generic15W = 95,
+ Generic16X = 96,
+ Generic16Y = 97,
+ Generic16Z = 98,
+ Generic16W = 99,
+ Generic17X = 100,
+ Generic17Y = 101,
+ Generic17Z = 102,
+ Generic17W = 103,
+ Generic18X = 104,
+ Generic18Y = 105,
+ Generic18Z = 106,
+ Generic18W = 107,
+ Generic19X = 108,
+ Generic19Y = 109,
+ Generic19Z = 110,
+ Generic19W = 111,
+ Generic20X = 112,
+ Generic20Y = 113,
+ Generic20Z = 114,
+ Generic20W = 115,
+ Generic21X = 116,
+ Generic21Y = 117,
+ Generic21Z = 118,
+ Generic21W = 119,
+ Generic22X = 120,
+ Generic22Y = 121,
+ Generic22Z = 122,
+ Generic22W = 123,
+ Generic23X = 124,
+ Generic23Y = 125,
+ Generic23Z = 126,
+ Generic23W = 127,
+ Generic24X = 128,
+ Generic24Y = 129,
+ Generic24Z = 130,
+ Generic24W = 131,
+ Generic25X = 132,
+ Generic25Y = 133,
+ Generic25Z = 134,
+ Generic25W = 135,
+ Generic26X = 136,
+ Generic26Y = 137,
+ Generic26Z = 138,
+ Generic26W = 139,
+ Generic27X = 140,
+ Generic27Y = 141,
+ Generic27Z = 142,
+ Generic27W = 143,
+ Generic28X = 144,
+ Generic28Y = 145,
+ Generic28Z = 146,
+ Generic28W = 147,
+ Generic29X = 148,
+ Generic29Y = 149,
+ Generic29Z = 150,
+ Generic29W = 151,
+ Generic30X = 152,
+ Generic30Y = 153,
+ Generic30Z = 154,
+ Generic30W = 155,
+ Generic31X = 156,
+ Generic31Y = 157,
+ Generic31Z = 158,
+ Generic31W = 159,
+ ColorFrontDiffuseR = 160,
+ ColorFrontDiffuseG = 161,
+ ColorFrontDiffuseB = 162,
+ ColorFrontDiffuseA = 163,
+ ColorFrontSpecularR = 164,
+ ColorFrontSpecularG = 165,
+ ColorFrontSpecularB = 166,
+ ColorFrontSpecularA = 167,
+ ColorBackDiffuseR = 168,
+ ColorBackDiffuseG = 169,
+ ColorBackDiffuseB = 170,
+ ColorBackDiffuseA = 171,
+ ColorBackSpecularR = 172,
+ ColorBackSpecularG = 173,
+ ColorBackSpecularB = 174,
+ ColorBackSpecularA = 175,
+ ClipDistance0 = 176,
+ ClipDistance1 = 177,
+ ClipDistance2 = 178,
+ ClipDistance3 = 179,
+ ClipDistance4 = 180,
+ ClipDistance5 = 181,
+ ClipDistance6 = 182,
+ ClipDistance7 = 183,
+ PointSpriteS = 184,
+ PointSpriteT = 185,
+ FogCoordinate = 186,
+ TessellationEvaluationPointU = 188,
+ TessellationEvaluationPointV = 189,
+ InstanceId = 190,
+ VertexId = 191,
+ FixedFncTexture0S = 192,
+ FixedFncTexture0T = 193,
+ FixedFncTexture0R = 194,
+ FixedFncTexture0Q = 195,
+ FixedFncTexture1S = 196,
+ FixedFncTexture1T = 197,
+ FixedFncTexture1R = 198,
+ FixedFncTexture1Q = 199,
+ FixedFncTexture2S = 200,
+ FixedFncTexture2T = 201,
+ FixedFncTexture2R = 202,
+ FixedFncTexture2Q = 203,
+ FixedFncTexture3S = 204,
+ FixedFncTexture3T = 205,
+ FixedFncTexture3R = 206,
+ FixedFncTexture3Q = 207,
+ FixedFncTexture4S = 208,
+ FixedFncTexture4T = 209,
+ FixedFncTexture4R = 210,
+ FixedFncTexture4Q = 211,
+ FixedFncTexture5S = 212,
+ FixedFncTexture5T = 213,
+ FixedFncTexture5R = 214,
+ FixedFncTexture5Q = 215,
+ FixedFncTexture6S = 216,
+ FixedFncTexture6T = 217,
+ FixedFncTexture6R = 218,
+ FixedFncTexture6Q = 219,
+ FixedFncTexture7S = 220,
+ FixedFncTexture7T = 221,
+ FixedFncTexture7R = 222,
+ FixedFncTexture7Q = 223,
+ FixedFncTexture8S = 224,
+ FixedFncTexture8T = 225,
+ FixedFncTexture8R = 226,
+ FixedFncTexture8Q = 227,
+ FixedFncTexture9S = 228,
+ FixedFncTexture9T = 229,
+ FixedFncTexture9R = 230,
+ FixedFncTexture9Q = 231,
+ ViewportMask = 232,
+ FrontFace = 255,
+};
+
+constexpr size_t NUM_GENERICS = 32;
+
+[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
+
+[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
+
+[[nodiscard]] u32 GenericAttributeElement(Attribute attribute);
+
+[[nodiscard]] std::string NameOf(Attribute attribute);
+
+[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept {
+ return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value);
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Attribute> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
new file mode 100644
index 000000000..7c08b25ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <initializer_list>
+#include <map>
+#include <memory>
+
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
+
+Block::~Block() = default;
+
+void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
+ PrependNewInst(end(), op, args);
+}
+
+Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
+ std::initializer_list<Value> args, u32 flags) {
+ Inst* const inst{inst_pool->Create(op, flags)};
+ const auto result_it{instructions.insert(insertion_point, *inst)};
+
+ if (inst->NumArgs() != args.size()) {
+ throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
+ }
+ std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
+ inst->SetArg(index, arg);
+ ++index;
+ });
+ return result_it;
+}
+
+void Block::AddBranch(Block* block) {
+ if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
+ throw LogicError("Successor already inserted");
+ }
+ if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
+ throw LogicError("Predecessor already inserted");
+ }
+ imm_successors.push_back(block);
+ block->imm_predecessors.push_back(this);
+}
+
+static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
+ Block* block) {
+ if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
+ return fmt::format("{{Block ${}}}", it->second);
+ }
+ return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
+}
+
+static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
+ const Inst* inst) {
+ const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
+ if (is_inserted) {
+ ++inst_index;
+ }
+ return it->second;
+}
+
+static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
+ const Value& arg) {
+ if (arg.IsEmpty()) {
+ return "<null>";
+ }
+ if (!arg.IsImmediate() || arg.IsIdentity()) {
+ return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
+ }
+ switch (arg.Type()) {
+ case Type::U1:
+ return fmt::format("#{}", arg.U1() ? "true" : "false");
+ case Type::U8:
+ return fmt::format("#{}", arg.U8());
+ case Type::U16:
+ return fmt::format("#{}", arg.U16());
+ case Type::U32:
+ return fmt::format("#{}", arg.U32());
+ case Type::U64:
+ return fmt::format("#{}", arg.U64());
+ case Type::F32:
+ return fmt::format("#{}", arg.F32());
+ case Type::Reg:
+ return fmt::format("{}", arg.Reg());
+ case Type::Pred:
+ return fmt::format("{}", arg.Pred());
+ case Type::Attribute:
+ return fmt::format("{}", arg.Attribute());
+ default:
+ return "<unknown immediate type>";
+ }
+}
+
+std::string DumpBlock(const Block& block) {
+ size_t inst_index{0};
+ std::map<const Inst*, size_t> inst_to_index;
+ return DumpBlock(block, {}, inst_to_index, inst_index);
+}
+
+std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index,
+ std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) {
+ std::string ret{"Block"};
+ if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
+ ret += fmt::format(" ${}", it->second);
+ }
+ ret += '\n';
+ for (const Inst& inst : block) {
+ const Opcode op{inst.GetOpcode()};
+ ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
+ if (TypeOf(op) != Type::Void) {
+ ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
+ } else {
+ ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
+ }
+ const size_t arg_count{inst.NumArgs()};
+ for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
+ const Value arg{inst.Arg(arg_index)};
+ const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
+ ret += arg_index != 0 ? ", " : " ";
+ if (op == Opcode::Phi) {
+ ret += fmt::format("[ {}, {} ]", arg_str,
+ BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
+ } else {
+ ret += arg_str;
+ }
+ if (op != Opcode::Phi) {
+ const Type actual_type{arg.Type()};
+ const Type expected_type{ArgTypeOf(op, arg_index)};
+ if (!AreTypesCompatible(actual_type, expected_type)) {
+ ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
+ }
+ }
+ }
+ if (TypeOf(op) != Type::Void) {
+ ret += fmt::format(" (uses: {})\n", inst.UseCount());
+ } else {
+ ret += '\n';
+ }
+ }
+ return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
new file mode 100644
index 000000000..7e134b4c7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -0,0 +1,185 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <initializer_list>
+#include <map>
+#include <span>
+#include <vector>
+
+#include <boost/intrusive/list.hpp>
+
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/condition.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::IR {
+
+class Block {
+public:
+ using InstructionList = boost::intrusive::list<Inst>;
+ using size_type = InstructionList::size_type;
+ using iterator = InstructionList::iterator;
+ using const_iterator = InstructionList::const_iterator;
+ using reverse_iterator = InstructionList::reverse_iterator;
+ using const_reverse_iterator = InstructionList::const_reverse_iterator;
+
+ explicit Block(ObjectPool<Inst>& inst_pool_);
+ ~Block();
+
+ Block(const Block&) = delete;
+ Block& operator=(const Block&) = delete;
+
+ Block(Block&&) = default;
+ Block& operator=(Block&&) = default;
+
+ /// Appends a new instruction to the end of this basic block.
+ void AppendNewInst(Opcode op, std::initializer_list<Value> args);
+
+ /// Prepends a new instruction to this basic block before the insertion point.
+ iterator PrependNewInst(iterator insertion_point, Opcode op,
+ std::initializer_list<Value> args = {}, u32 flags = 0);
+
+ /// Adds a new branch to this basic block.
+ void AddBranch(Block* block);
+
+ /// Gets a mutable reference to the instruction list for this basic block.
+ [[nodiscard]] InstructionList& Instructions() noexcept {
+ return instructions;
+ }
+ /// Gets an immutable reference to the instruction list for this basic block.
+ [[nodiscard]] const InstructionList& Instructions() const noexcept {
+ return instructions;
+ }
+
+ /// Gets an immutable span to the immediate predecessors.
+ [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
+ return imm_predecessors;
+ }
+ /// Gets an immutable span to the immediate successors.
+ [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
+ return imm_successors;
+ }
+
+ /// Intrusively store the host definition of this instruction.
+ template <typename DefinitionType>
+ void SetDefinition(DefinitionType def) {
+ definition = Common::BitCast<u32>(def);
+ }
+
+ /// Return the intrusively stored host definition of this instruction.
+ template <typename DefinitionType>
+ [[nodiscard]] DefinitionType Definition() const noexcept {
+ return Common::BitCast<DefinitionType>(definition);
+ }
+
+ void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept {
+ ssa_reg_values[RegIndex(reg)] = value;
+ }
+ const Value& SsaRegValue(IR::Reg reg) const noexcept {
+ return ssa_reg_values[RegIndex(reg)];
+ }
+
+ void SsaSeal() noexcept {
+ is_ssa_sealed = true;
+ }
+ [[nodiscard]] bool IsSsaSealed() const noexcept {
+ return is_ssa_sealed;
+ }
+
+ [[nodiscard]] bool empty() const {
+ return instructions.empty();
+ }
+ [[nodiscard]] size_type size() const {
+ return instructions.size();
+ }
+
+ [[nodiscard]] Inst& front() {
+ return instructions.front();
+ }
+ [[nodiscard]] const Inst& front() const {
+ return instructions.front();
+ }
+
+ [[nodiscard]] Inst& back() {
+ return instructions.back();
+ }
+ [[nodiscard]] const Inst& back() const {
+ return instructions.back();
+ }
+
+ [[nodiscard]] iterator begin() {
+ return instructions.begin();
+ }
+ [[nodiscard]] const_iterator begin() const {
+ return instructions.begin();
+ }
+ [[nodiscard]] iterator end() {
+ return instructions.end();
+ }
+ [[nodiscard]] const_iterator end() const {
+ return instructions.end();
+ }
+
+ [[nodiscard]] reverse_iterator rbegin() {
+ return instructions.rbegin();
+ }
+ [[nodiscard]] const_reverse_iterator rbegin() const {
+ return instructions.rbegin();
+ }
+ [[nodiscard]] reverse_iterator rend() {
+ return instructions.rend();
+ }
+ [[nodiscard]] const_reverse_iterator rend() const {
+ return instructions.rend();
+ }
+
+ [[nodiscard]] const_iterator cbegin() const {
+ return instructions.cbegin();
+ }
+ [[nodiscard]] const_iterator cend() const {
+ return instructions.cend();
+ }
+
+ [[nodiscard]] const_reverse_iterator crbegin() const {
+ return instructions.crbegin();
+ }
+ [[nodiscard]] const_reverse_iterator crend() const {
+ return instructions.crend();
+ }
+
+private:
+ /// Memory pool for instruction list
+ ObjectPool<Inst>* inst_pool;
+
+ /// List of instructions in this block
+ InstructionList instructions;
+
+ /// Block immediate predecessors
+ std::vector<Block*> imm_predecessors;
+ /// Block immediate successors
+ std::vector<Block*> imm_successors;
+
+ /// Intrusively store the value of a register in the block.
+ std::array<Value, NUM_REGS> ssa_reg_values;
+ /// Intrusively store if the block is sealed in the SSA pass.
+ bool is_ssa_sealed{false};
+
+ /// Intrusively stored host definition of this block.
+ u32 definition{};
+};
+
+using BlockList = std::vector<Block*>;
+
+[[nodiscard]] std::string DumpBlock(const Block& block);
+
+[[nodiscard]] std::string DumpBlock(const Block& block,
+ const std::map<const Block*, size_t>& block_to_index,
+ std::map<const Inst*, size_t>& inst_to_index,
+ size_t& inst_index);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h
new file mode 100644
index 000000000..a52ccbd58
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h
@@ -0,0 +1,56 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <type_traits>
+#include <queue>
+
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+template <typename Pred>
+auto BreadthFirstSearch(const Value& value, Pred&& pred)
+ -> std::invoke_result_t<Pred, const Inst*> {
+ if (value.IsImmediate()) {
+ // Nothing to do with immediates
+ return std::nullopt;
+ }
+ // Breadth-first search visiting the right most arguments first
+ // Small vector has been determined from shaders in Super Smash Bros. Ultimate
+ boost::container::small_vector<const Inst*, 2> visited;
+ std::queue<const Inst*> queue;
+ queue.push(value.InstRecursive());
+
+ while (!queue.empty()) {
+ // Pop one instruction from the queue
+ const Inst* const inst{queue.front()};
+ queue.pop();
+ if (const std::optional result = pred(inst)) {
+ // This is the instruction we were looking for
+ return result;
+ }
+ // Visit the right most arguments first
+ for (size_t arg = inst->NumArgs(); arg--;) {
+ const Value arg_value{inst->Arg(arg)};
+ if (arg_value.IsImmediate()) {
+ continue;
+ }
+ // Queue instruction if it hasn't been visited
+ const Inst* const arg_inst{arg_value.InstRecursive()};
+ if (std::ranges::find(visited, arg_inst) == visited.end()) {
+ visited.push_back(arg_inst);
+ queue.push(arg_inst);
+ }
+ }
+ }
+ // SSA tree has been traversed and the result hasn't been found
+ return std::nullopt;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
new file mode 100644
index 000000000..fc18ea2a2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -0,0 +1,29 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/condition.h"
+
+namespace Shader::IR {
+
+std::string NameOf(Condition condition) {
+ std::string ret;
+ if (condition.GetFlowTest() != FlowTest::T) {
+ ret = fmt::to_string(condition.GetFlowTest());
+ }
+ const auto [pred, negated]{condition.GetPred()};
+ if (!ret.empty()) {
+ ret += '&';
+ }
+ if (negated) {
+ ret += '!';
+ }
+ ret += fmt::to_string(pred);
+ return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
new file mode 100644
index 000000000..aa8597c60
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -0,0 +1,60 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <string>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/flow_test.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+
+namespace Shader::IR {
+
+class Condition {
+public:
+ Condition() noexcept = default;
+
+ explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept
+ : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)},
+ pred_negated{pred_negated_ ? u8{1} : u8{0}} {}
+
+ explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept
+ : Condition(FlowTest::T, pred_, pred_negated_) {}
+
+ explicit Condition(bool value) : Condition(Pred::PT, !value) {}
+
+ auto operator<=>(const Condition&) const noexcept = default;
+
+ [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
+ return static_cast<IR::FlowTest>(flow_test);
+ }
+
+ [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
+ return {static_cast<IR::Pred>(pred), pred_negated != 0};
+ }
+
+private:
+ u16 flow_test;
+ u8 pred;
+ u8 pred_negated;
+};
+
+std::string NameOf(Condition condition);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Condition> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Condition& cond, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp
new file mode 100644
index 000000000..6ebb4ad89
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.cpp
@@ -0,0 +1,83 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/flow_test.h"
+
+namespace Shader::IR {
+
+std::string NameOf(FlowTest flow_test) {
+ switch (flow_test) {
+ case FlowTest::F:
+ return "F";
+ case FlowTest::LT:
+ return "LT";
+ case FlowTest::EQ:
+ return "EQ";
+ case FlowTest::LE:
+ return "LE";
+ case FlowTest::GT:
+ return "GT";
+ case FlowTest::NE:
+ return "NE";
+ case FlowTest::GE:
+ return "GE";
+ case FlowTest::NUM:
+ return "NUM";
+ case FlowTest::NaN:
+ return "NAN";
+ case FlowTest::LTU:
+ return "LTU";
+ case FlowTest::EQU:
+ return "EQU";
+ case FlowTest::LEU:
+ return "LEU";
+ case FlowTest::GTU:
+ return "GTU";
+ case FlowTest::NEU:
+ return "NEU";
+ case FlowTest::GEU:
+ return "GEU";
+ case FlowTest::T:
+ return "T";
+ case FlowTest::OFF:
+ return "OFF";
+ case FlowTest::LO:
+ return "LO";
+ case FlowTest::SFF:
+ return "SFF";
+ case FlowTest::LS:
+ return "LS";
+ case FlowTest::HI:
+ return "HI";
+ case FlowTest::SFT:
+ return "SFT";
+ case FlowTest::HS:
+ return "HS";
+ case FlowTest::OFT:
+ return "OFT";
+ case FlowTest::CSM_TA:
+ return "CSM_TA";
+ case FlowTest::CSM_TR:
+ return "CSM_TR";
+ case FlowTest::CSM_MX:
+ return "CSM_MX";
+ case FlowTest::FCSM_TA:
+ return "FCSM_TA";
+ case FlowTest::FCSM_TR:
+ return "FCSM_TR";
+ case FlowTest::FCSM_MX:
+ return "FCSM_MX";
+ case FlowTest::RLE:
+ return "RLE";
+ case FlowTest::RGT:
+ return "RGT";
+ }
+ return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test));
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h
new file mode 100644
index 000000000..09e113773
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.h
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class FlowTest : u64 {
+ F,
+ LT,
+ EQ,
+ LE,
+ GT,
+ NE,
+ GE,
+ NUM,
+ NaN,
+ LTU,
+ EQU,
+ LEU,
+ GTU,
+ NEU,
+ GEU,
+ T,
+ OFF,
+ LO,
+ SFF,
+ LS,
+ HI,
+ SFT,
+ HS,
+ OFT,
+ CSM_TA,
+ CSM_TR,
+ CSM_MX,
+ FCSM_TA,
+ FCSM_TR,
+ FCSM_MX,
+ RLE,
+ RGT,
+};
+
+[[nodiscard]] std::string NameOf(FlowTest flow_test);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::FlowTest> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
new file mode 100644
index 000000000..13159a68d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -0,0 +1,2017 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_cast.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+namespace {
+[[noreturn]] void ThrowInvalidType(Type type) {
+ throw InvalidArgument("Invalid type {}", type);
+}
+
+Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
+ if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
+ return ir.CompositeConstruct(bias_lod, lod_clamp);
+ } else if (!bias_lod.IsEmpty()) {
+ return bias_lod;
+ } else if (!lod_clamp.IsEmpty()) {
+ return lod_clamp;
+ } else {
+ return Value{};
+ }
+}
+} // Anonymous namespace
+
+U1 IREmitter::Imm1(bool value) const {
+ return U1{Value{value}};
+}
+
+U8 IREmitter::Imm8(u8 value) const {
+ return U8{Value{value}};
+}
+
+U16 IREmitter::Imm16(u16 value) const {
+ return U16{Value{value}};
+}
+
+U32 IREmitter::Imm32(u32 value) const {
+ return U32{Value{value}};
+}
+
+U32 IREmitter::Imm32(s32 value) const {
+ return U32{Value{static_cast<u32>(value)}};
+}
+
+F32 IREmitter::Imm32(f32 value) const {
+ return F32{Value{value}};
+}
+
+U64 IREmitter::Imm64(u64 value) const {
+ return U64{Value{value}};
+}
+
+U64 IREmitter::Imm64(s64 value) const {
+ return U64{Value{static_cast<u64>(value)}};
+}
+
+F64 IREmitter::Imm64(f64 value) const {
+ return F64{Value{value}};
+}
+
+U1 IREmitter::ConditionRef(const U1& value) {
+ return Inst<U1>(Opcode::ConditionRef, value);
+}
+
+void IREmitter::Reference(const Value& value) {
+ Inst(Opcode::Reference, value);
+}
+
+void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
+ Inst(Opcode::PhiMove, Value{&phi}, value);
+}
+
+void IREmitter::Prologue() {
+ Inst(Opcode::Prologue);
+}
+
+void IREmitter::Epilogue() {
+ Inst(Opcode::Epilogue);
+}
+
+void IREmitter::DemoteToHelperInvocation() {
+ Inst(Opcode::DemoteToHelperInvocation);
+}
+
+void IREmitter::EmitVertex(const U32& stream) {
+ Inst(Opcode::EmitVertex, stream);
+}
+
+void IREmitter::EndPrimitive(const U32& stream) {
+ Inst(Opcode::EndPrimitive, stream);
+}
+
+void IREmitter::Barrier() {
+ Inst(Opcode::Barrier);
+}
+
+void IREmitter::WorkgroupMemoryBarrier() {
+ Inst(Opcode::WorkgroupMemoryBarrier);
+}
+
+void IREmitter::DeviceMemoryBarrier() {
+ Inst(Opcode::DeviceMemoryBarrier);
+}
+
+U32 IREmitter::GetReg(IR::Reg reg) {
+ return Inst<U32>(Opcode::GetRegister, reg);
+}
+
+void IREmitter::SetReg(IR::Reg reg, const U32& value) {
+ Inst(Opcode::SetRegister, reg, value);
+}
+
+U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) {
+ if (pred == Pred::PT) {
+ return Imm1(!is_negated);
+ }
+ const U1 value{Inst<U1>(Opcode::GetPred, pred)};
+ if (is_negated) {
+ return Inst<U1>(Opcode::LogicalNot, value);
+ } else {
+ return value;
+ }
+}
+
+void IREmitter::SetPred(IR::Pred pred, const U1& value) {
+ if (pred != IR::Pred::PT) {
+ Inst(Opcode::SetPred, pred, value);
+ }
+}
+
+U1 IREmitter::GetGotoVariable(u32 id) {
+ return Inst<U1>(Opcode::GetGotoVariable, id);
+}
+
+void IREmitter::SetGotoVariable(u32 id, const U1& value) {
+ Inst(Opcode::SetGotoVariable, id, value);
+}
+
+U32 IREmitter::GetIndirectBranchVariable() {
+ return Inst<U32>(Opcode::GetIndirectBranchVariable);
+}
+
+void IREmitter::SetIndirectBranchVariable(const U32& value) {
+ Inst(Opcode::SetIndirectBranchVariable, value);
+}
+
+U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) {
+ return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
+}
+
+Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
+ bool is_signed) {
+ switch (bitsize) {
+ case 8:
+ return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset);
+ case 16:
+ return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset);
+ case 32:
+ return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
+ case 64:
+ return Inst(Opcode::GetCbufU32x2, binding, byte_offset);
+ default:
+ throw InvalidArgument("Invalid bit size {}", bitsize);
+ }
+}
+
+F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) {
+ return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset);
+}
+
+U1 IREmitter::GetZFlag() {
+ return Inst<U1>(Opcode::GetZFlag);
+}
+
+U1 IREmitter::GetSFlag() {
+ return Inst<U1>(Opcode::GetSFlag);
+}
+
+U1 IREmitter::GetCFlag() {
+ return Inst<U1>(Opcode::GetCFlag);
+}
+
+U1 IREmitter::GetOFlag() {
+ return Inst<U1>(Opcode::GetOFlag);
+}
+
+void IREmitter::SetZFlag(const U1& value) {
+ Inst(Opcode::SetZFlag, value);
+}
+
+void IREmitter::SetSFlag(const U1& value) {
+ Inst(Opcode::SetSFlag, value);
+}
+
+void IREmitter::SetCFlag(const U1& value) {
+ Inst(Opcode::SetCFlag, value);
+}
+
+void IREmitter::SetOFlag(const U1& value) {
+ Inst(Opcode::SetOFlag, value);
+}
+
+static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
+ switch (flow_test) {
+ case FlowTest::F:
+ return ir.Imm1(false);
+ case FlowTest::LT:
+ return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())),
+ ir.GetOFlag());
+ case FlowTest::EQ:
+ return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag());
+ case FlowTest::LE:
+ return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
+ case FlowTest::GT:
+ return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()),
+ ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::NE:
+ return ir.LogicalNot(ir.GetZFlag());
+ case FlowTest::GE:
+ return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()));
+ case FlowTest::NUM:
+ return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::NaN:
+ return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag());
+ case FlowTest::LTU:
+ return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag());
+ case FlowTest::EQU:
+ return ir.GetZFlag();
+ case FlowTest::LEU:
+ return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag());
+ case FlowTest::GTU:
+ return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()),
+ ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
+ case FlowTest::NEU:
+ return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::GEU:
+ return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()),
+ ir.GetOFlag());
+ case FlowTest::T:
+ return ir.Imm1(true);
+ case FlowTest::OFF:
+ return ir.LogicalNot(ir.GetOFlag());
+ case FlowTest::LO:
+ return ir.LogicalNot(ir.GetCFlag());
+ case FlowTest::SFF:
+ return ir.LogicalNot(ir.GetSFlag());
+ case FlowTest::LS:
+ return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag()));
+ case FlowTest::HI:
+ return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::SFT:
+ return ir.GetSFlag();
+ case FlowTest::HS:
+ return ir.GetCFlag();
+ case FlowTest::OFT:
+ return ir.GetOFlag();
+ case FlowTest::RLE:
+ return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag());
+ case FlowTest::RGT:
+ return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::FCSM_TR:
+ LOG_WARNING(Shader, "(STUBBED) FCSM_TR");
+ return ir.Imm1(false);
+ case FlowTest::CSM_TA:
+ case FlowTest::CSM_TR:
+ case FlowTest::CSM_MX:
+ case FlowTest::FCSM_TA:
+ case FlowTest::FCSM_MX:
+ default:
+ throw NotImplementedException("Flow test {}", flow_test);
+ }
+}
+
+U1 IREmitter::Condition(IR::Condition cond) {
+ const FlowTest flow_test{cond.GetFlowTest()};
+ const auto [pred, is_negated]{cond.GetPred()};
+ if (flow_test == FlowTest::T) {
+ return GetPred(pred, is_negated);
+ }
+ return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
+}
+
+U1 IREmitter::GetFlowTestResult(FlowTest test) {
+ return GetFlowTest(*this, test);
+}
+
+F32 IREmitter::GetAttribute(IR::Attribute attribute) {
+ return GetAttribute(attribute, Imm32(0));
+}
+
+F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
+ return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
+}
+
+void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
+ Inst(Opcode::SetAttribute, attribute, value, vertex);
+}
+
+F32 IREmitter::GetAttributeIndexed(const U32& phys_address) {
+ return GetAttributeIndexed(phys_address, Imm32(0));
+}
+
+F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) {
+ return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex);
+}
+
+void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) {
+ Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);
+}
+
+F32 IREmitter::GetPatch(Patch patch) {
+ return Inst<F32>(Opcode::GetPatch, patch);
+}
+
+void IREmitter::SetPatch(Patch patch, const F32& value) {
+ Inst(Opcode::SetPatch, patch, value);
+}
+
+void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
+ Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
+}
+
+void IREmitter::SetSampleMask(const U32& value) {
+ Inst(Opcode::SetSampleMask, value);
+}
+
+void IREmitter::SetFragDepth(const F32& value) {
+ Inst(Opcode::SetFragDepth, value);
+}
+
+U32 IREmitter::WorkgroupIdX() {
+ return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
+}
+
+U32 IREmitter::WorkgroupIdY() {
+ return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)};
+}
+
+U32 IREmitter::WorkgroupIdZ() {
+ return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
+}
+
+Value IREmitter::LocalInvocationId() {
+ return Inst(Opcode::LocalInvocationId);
+}
+
+U32 IREmitter::LocalInvocationIdX() {
+ return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
+}
+
+U32 IREmitter::LocalInvocationIdY() {
+ return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)};
+}
+
+U32 IREmitter::LocalInvocationIdZ() {
+ return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)};
+}
+
+U32 IREmitter::InvocationId() {
+ return Inst<U32>(Opcode::InvocationId);
+}
+
+U32 IREmitter::SampleId() {
+ return Inst<U32>(Opcode::SampleId);
+}
+
+U1 IREmitter::IsHelperInvocation() {
+ return Inst<U1>(Opcode::IsHelperInvocation);
+}
+
+F32 IREmitter::YDirection() {
+ return Inst<F32>(Opcode::YDirection);
+}
+
+U32 IREmitter::LaneId() {
+ return Inst<U32>(Opcode::LaneId);
+}
+
+U32 IREmitter::LoadGlobalU8(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobalU8, address);
+}
+
+U32 IREmitter::LoadGlobalS8(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobalS8, address);
+}
+
+U32 IREmitter::LoadGlobalU16(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobalU16, address);
+}
+
+U32 IREmitter::LoadGlobalS16(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobalS16, address);
+}
+
+U32 IREmitter::LoadGlobal32(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobal32, address);
+}
+
+Value IREmitter::LoadGlobal64(const U64& address) {
+ return Inst<Value>(Opcode::LoadGlobal64, address);
+}
+
+Value IREmitter::LoadGlobal128(const U64& address) {
+ return Inst<Value>(Opcode::LoadGlobal128, address);
+}
+
+void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobalU8, address, value);
+}
+
+void IREmitter::WriteGlobalS8(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobalS8, address, value);
+}
+
+void IREmitter::WriteGlobalU16(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobalU16, address, value);
+}
+
+void IREmitter::WriteGlobalS16(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobalS16, address, value);
+}
+
+void IREmitter::WriteGlobal32(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobal32, address, value);
+}
+
+void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) {
+ Inst(Opcode::WriteGlobal64, address, vector);
+}
+
+void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
+ Inst(Opcode::WriteGlobal128, address, vector);
+}
+
+U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
+ return Inst<U32>(Opcode::LoadLocal, word_offset);
+}
+
+void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
+ Inst(Opcode::WriteLocal, word_offset, value);
+}
+
+Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
+ switch (bit_size) {
+ case 8:
+ return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
+ case 16:
+ return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
+ case 32:
+ return Inst(Opcode::LoadSharedU32, offset);
+ case 64:
+ return Inst(Opcode::LoadSharedU64, offset);
+ case 128:
+ return Inst(Opcode::LoadSharedU128, offset);
+ }
+ throw InvalidArgument("Invalid bit size {}", bit_size);
+}
+
+void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
+ switch (bit_size) {
+ case 8:
+ Inst(Opcode::WriteSharedU8, offset, value);
+ break;
+ case 16:
+ Inst(Opcode::WriteSharedU16, offset, value);
+ break;
+ case 32:
+ Inst(Opcode::WriteSharedU32, offset, value);
+ break;
+ case 64:
+ Inst(Opcode::WriteSharedU64, offset, value);
+ break;
+ case 128:
+ Inst(Opcode::WriteSharedU128, offset, value);
+ break;
+ default:
+ throw InvalidArgument("Invalid bit size {}", bit_size);
+ }
+}
+
+U1 IREmitter::GetZeroFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetZeroFromOp, op);
+}
+
+U1 IREmitter::GetSignFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetSignFromOp, op);
+}
+
+U1 IREmitter::GetCarryFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetCarryFromOp, op);
+}
+
+U1 IREmitter::GetOverflowFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetOverflowFromOp, op);
+}
+
+U1 IREmitter::GetSparseFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetSparseFromOp, op);
+}
+
+U1 IREmitter::GetInBoundsFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetInBoundsFromOp, op);
+}
+
+F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
+ if (a.Type() != b.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+ }
+ switch (a.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
+ if (e1.Type() != e2.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
+ }
+ switch (e1.Type()) {
+ case Type::U32:
+ return Inst(Opcode::CompositeConstructU32x2, e1, e2);
+ case Type::F16:
+ return Inst(Opcode::CompositeConstructF16x2, e1, e2);
+ case Type::F32:
+ return Inst(Opcode::CompositeConstructF32x2, e1, e2);
+ case Type::F64:
+ return Inst(Opcode::CompositeConstructF64x2, e1, e2);
+ default:
+ ThrowInvalidType(e1.Type());
+ }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) {
+ if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
+ throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
+ }
+ switch (e1.Type()) {
+ case Type::U32:
+ return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3);
+ case Type::F16:
+ return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3);
+ case Type::F32:
+ return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3);
+ case Type::F64:
+ return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3);
+ default:
+ ThrowInvalidType(e1.Type());
+ }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
+ const Value& e4) {
+ if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
+ throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
+ e3.Type(), e4.Type());
+ }
+ switch (e1.Type()) {
+ case Type::U32:
+ return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
+ case Type::F16:
+ return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4);
+ case Type::F32:
+ return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
+ case Type::F64:
+ return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
+ default:
+ ThrowInvalidType(e1.Type());
+ }
+}
+
+Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
+ const auto read{[&](Opcode opcode, size_t limit) -> Value {
+ if (element >= limit) {
+ throw InvalidArgument("Out of bounds element {}", element);
+ }
+ return Inst(opcode, vector, Value{static_cast<u32>(element)});
+ }};
+ switch (vector.Type()) {
+ case Type::U32x2:
+ return read(Opcode::CompositeExtractU32x2, 2);
+ case Type::U32x3:
+ return read(Opcode::CompositeExtractU32x3, 3);
+ case Type::U32x4:
+ return read(Opcode::CompositeExtractU32x4, 4);
+ case Type::F16x2:
+ return read(Opcode::CompositeExtractF16x2, 2);
+ case Type::F16x3:
+ return read(Opcode::CompositeExtractF16x3, 3);
+ case Type::F16x4:
+ return read(Opcode::CompositeExtractF16x4, 4);
+ case Type::F32x2:
+ return read(Opcode::CompositeExtractF32x2, 2);
+ case Type::F32x3:
+ return read(Opcode::CompositeExtractF32x3, 3);
+ case Type::F32x4:
+ return read(Opcode::CompositeExtractF32x4, 4);
+ case Type::F64x2:
+ return read(Opcode::CompositeExtractF64x2, 2);
+ case Type::F64x3:
+ return read(Opcode::CompositeExtractF64x3, 3);
+ case Type::F64x4:
+ return read(Opcode::CompositeExtractF64x4, 4);
+ default:
+ ThrowInvalidType(vector.Type());
+ }
+}
+
+Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
+ const auto insert{[&](Opcode opcode, size_t limit) {
+ if (element >= limit) {
+ throw InvalidArgument("Out of bounds element {}", element);
+ }
+ return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
+ }};
+ switch (vector.Type()) {
+ case Type::U32x2:
+ return insert(Opcode::CompositeInsertU32x2, 2);
+ case Type::U32x3:
+ return insert(Opcode::CompositeInsertU32x3, 3);
+ case Type::U32x4:
+ return insert(Opcode::CompositeInsertU32x4, 4);
+ case Type::F16x2:
+ return insert(Opcode::CompositeInsertF16x2, 2);
+ case Type::F16x3:
+ return insert(Opcode::CompositeInsertF16x3, 3);
+ case Type::F16x4:
+ return insert(Opcode::CompositeInsertF16x4, 4);
+ case Type::F32x2:
+ return insert(Opcode::CompositeInsertF32x2, 2);
+ case Type::F32x3:
+ return insert(Opcode::CompositeInsertF32x3, 3);
+ case Type::F32x4:
+ return insert(Opcode::CompositeInsertF32x4, 4);
+ case Type::F64x2:
+ return insert(Opcode::CompositeInsertF64x2, 2);
+ case Type::F64x3:
+ return insert(Opcode::CompositeInsertF64x3, 3);
+ case Type::F64x4:
+ return insert(Opcode::CompositeInsertF64x4, 4);
+ default:
+ ThrowInvalidType(vector.Type());
+ }
+}
+
+Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
+ if (true_value.Type() != false_value.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
+ }
+ switch (true_value.Type()) {
+ case Type::U1:
+ return Inst(Opcode::SelectU1, condition, true_value, false_value);
+ case Type::U8:
+ return Inst(Opcode::SelectU8, condition, true_value, false_value);
+ case Type::U16:
+ return Inst(Opcode::SelectU16, condition, true_value, false_value);
+ case Type::U32:
+ return Inst(Opcode::SelectU32, condition, true_value, false_value);
+ case Type::U64:
+ return Inst(Opcode::SelectU64, condition, true_value, false_value);
+ case Type::F32:
+ return Inst(Opcode::SelectF32, condition, true_value, false_value);
+ case Type::F64:
+ return Inst(Opcode::SelectF64, condition, true_value, false_value);
+ default:
+ throw InvalidArgument("Invalid type {}", true_value.Type());
+ }
+}
+
+template <>
+IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
+ return Inst<IR::U32>(Opcode::BitCastU32F32, value);
+}
+
+template <>
+IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
+ return Inst<IR::F32>(Opcode::BitCastF32U32, value);
+}
+
+template <>
+IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) {
+ return Inst<IR::U16>(Opcode::BitCastU16F16, value);
+}
+
+template <>
+IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
+ return Inst<IR::F16>(Opcode::BitCastF16U16, value);
+}
+
+template <>
+IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
+ return Inst<IR::U64>(Opcode::BitCastU64F64, value);
+}
+
+template <>
+IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
+ return Inst<IR::F64>(Opcode::BitCastF64U64, value);
+}
+
+U64 IREmitter::PackUint2x32(const Value& vector) {
+ return Inst<U64>(Opcode::PackUint2x32, vector);
+}
+
+Value IREmitter::UnpackUint2x32(const U64& value) {
+ return Inst<Value>(Opcode::UnpackUint2x32, value);
+}
+
+U32 IREmitter::PackFloat2x16(const Value& vector) {
+ return Inst<U32>(Opcode::PackFloat2x16, vector);
+}
+
+Value IREmitter::UnpackFloat2x16(const U32& value) {
+ return Inst(Opcode::UnpackFloat2x16, value);
+}
+
+U32 IREmitter::PackHalf2x16(const Value& vector) {
+ return Inst<U32>(Opcode::PackHalf2x16, vector);
+}
+
+Value IREmitter::UnpackHalf2x16(const U32& value) {
+ return Inst(Opcode::UnpackHalf2x16, value);
+}
+
+F64 IREmitter::PackDouble2x32(const Value& vector) {
+ return Inst<F64>(Opcode::PackDouble2x32, vector);
+}
+
+Value IREmitter::UnpackDouble2x32(const F64& value) {
+ return Inst<Value>(Opcode::UnpackDouble2x32, value);
+}
+
+F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) {
+ if (a.Type() != b.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+ }
+ switch (a.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
+ FpControl control) {
+ if (a.Type() != b.Type() || a.Type() != c.Type()) {
+ throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
+ }
+ switch (a.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPAbs16, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPAbs32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPAbs64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPNeg16, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPNeg32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPNeg64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
+ F16F32F64 result{value};
+ if (abs) {
+ result = FPAbs(result);
+ }
+ if (neg) {
+ result = FPNeg(result);
+ }
+ return result;
+}
+
+F32 IREmitter::FPCos(const F32& value) {
+ return Inst<F32>(Opcode::FPCos, value);
+}
+
+F32 IREmitter::FPSin(const F32& value) {
+ return Inst<F32>(Opcode::FPSin, value);
+}
+
+F32 IREmitter::FPExp2(const F32& value) {
+ return Inst<F32>(Opcode::FPExp2, value);
+}
+
+F32 IREmitter::FPLog2(const F32& value) {
+ return Inst<F32>(Opcode::FPLog2, value);
+}
+
+F32F64 IREmitter::FPRecip(const F32F64& value) {
+ switch (value.Type()) {
+ case Type::F32:
+ return Inst<F32>(Opcode::FPRecip32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPRecip64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
+ switch (value.Type()) {
+ case Type::F32:
+ return Inst<F32>(Opcode::FPRecipSqrt32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPRecipSqrt64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F32 IREmitter::FPSqrt(const F32& value) {
+ return Inst<F32>(Opcode::FPSqrt, value);
+}
+
+F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPSaturate16, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPSaturate32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPSaturate64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
+ const F16F32F64& max_value) {
+ if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
+ throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
+ max_value.Type());
+ }
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPFloor16, Flags{control}, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPFloor32, Flags{control}, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPFloor64, Flags{control}, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPCeil16, Flags{control}, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPCeil32, Flags{control}, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPCeil64, Flags{control}, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control},
+ lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control},
+ lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control},
+ lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
+ : Opcode::FPUnordGreaterThanEqual16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
+ : Opcode::FPUnordGreaterThanEqual32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
+ : Opcode::FPUnordGreaterThanEqual64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPIsNan(const F16F32F64& value) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U1>(Opcode::FPIsNan16, value);
+ case Type::F32:
+ return Inst<U1>(Opcode::FPIsNan32, value);
+ case Type::F64:
+ return Inst<U1>(Opcode::FPIsNan64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
+}
+
+U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
+}
+
+F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F32:
+ return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F32:
+ return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
+ if (a.Type() != b.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+ }
+ switch (a.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::IAdd32, a, b);
+ case Type::U64:
+ return Inst<U64>(Opcode::IAdd64, a, b);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
+ if (a.Type() != b.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+ }
+ switch (a.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::ISub32, a, b);
+ case Type::U64:
+ return Inst<U64>(Opcode::ISub64, a, b);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+U32 IREmitter::IMul(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::IMul32, a, b);
+}
+
+U32U64 IREmitter::INeg(const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::INeg32, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::INeg64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32 IREmitter::IAbs(const U32& value) {
+ return Inst<U32>(Opcode::IAbs32, value);
+}
+
+U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) {
+ switch (base.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
+ case Type::U64:
+ return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift);
+ default:
+ ThrowInvalidType(base.Type());
+ }
+}
+
+U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) {
+ switch (base.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
+ case Type::U64:
+ return Inst<U64>(Opcode::ShiftRightLogical64, base, shift);
+ default:
+ ThrowInvalidType(base.Type());
+ }
+}
+
+U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
+ switch (base.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
+ case Type::U64:
+ return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift);
+ default:
+ ThrowInvalidType(base.Type());
+ }
+}
+
+U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::BitwiseAnd32, a, b);
+}
+
+U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::BitwiseOr32, a, b);
+}
+
+U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::BitwiseXor32, a, b);
+}
+
+U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+ const U32& count) {
+ return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
+}
+
+U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+ bool is_signed) {
+ return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
+ count);
+}
+
+U32 IREmitter::BitReverse(const U32& value) {
+ return Inst<U32>(Opcode::BitReverse32, value);
+}
+
+U32 IREmitter::BitCount(const U32& value) {
+ return Inst<U32>(Opcode::BitCount32, value);
+}
+
+U32 IREmitter::BitwiseNot(const U32& value) {
+ return Inst<U32>(Opcode::BitwiseNot32, value);
+}
+
+U32 IREmitter::FindSMsb(const U32& value) {
+ return Inst<U32>(Opcode::FindSMsb32, value);
+}
+
+U32 IREmitter::FindUMsb(const U32& value) {
+ return Inst<U32>(Opcode::FindUMsb32, value);
+}
+
+U32 IREmitter::SMin(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::SMin32, a, b);
+}
+
+U32 IREmitter::UMin(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::UMin32, a, b);
+}
+
+U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) {
+ return is_signed ? SMin(a, b) : UMin(a, b);
+}
+
+U32 IREmitter::SMax(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::SMax32, a, b);
+}
+
+U32 IREmitter::UMax(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::UMax32, a, b);
+}
+
+U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) {
+ return is_signed ? SMax(a, b) : UMax(a, b);
+}
+
+U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) {
+ return Inst<U32>(Opcode::SClamp32, value, min, max);
+}
+
+U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) {
+ return Inst<U32>(Opcode::UClamp32, value, min, max);
+}
+
+U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
+ return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
+}
+
+U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::U32:
+ return Inst<U1>(Opcode::IEqual, lhs, rhs);
+ case Type::U64: {
+ // Manually compare the unpacked values
+ const Value lhs_vector{UnpackUint2x32(lhs)};
+ const Value rhs_vector{UnpackUint2x32(rhs)};
+ return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)},
+ IR::U32{CompositeExtract(rhs_vector, 0)}),
+ IEqual(IR::U32{CompositeExtract(lhs_vector, 1)},
+ IR::U32{CompositeExtract(rhs_vector, 1)}));
+ }
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+ return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
+ return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
+}
+
+U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
+ return Inst<U1>(Opcode::INotEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+ return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
+}
+
+U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
+ return is_signed ? SharedAtomicSMin(pointer_offset, value)
+ : SharedAtomicUMin(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
+ return is_signed ? SharedAtomicSMax(pointer_offset, value)
+ : SharedAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
+}
+
+U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
+ default:
+ ThrowInvalidType(pointer_offset.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+ return is_signed ? GlobalAtomicSMin(pointer_offset, value)
+ : GlobalAtomicUMin(pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+ return is_signed ? GlobalAtomicSMax(pointer_offset, value)
+ : GlobalAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
+ default:
+ ThrowInvalidType(pointer_offset.Type());
+ }
+}
+
+F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+ const FpControl control) {
+ return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+ const FpControl control) {
+ return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+ const FpControl control) {
+ return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+ const FpControl control) {
+ return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
+}
+
+U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
+ return Inst<U1>(Opcode::LogicalOr, a, b);
+}
+
+U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
+ return Inst<U1>(Opcode::LogicalAnd, a, b);
+}
+
+U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
+ return Inst<U1>(Opcode::LogicalXor, a, b);
+}
+
+U1 IREmitter::LogicalNot(const U1& value) {
+ return Inst<U1>(Opcode::LogicalNot, value);
+}
+
+U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
+ switch (bitsize) {
+ case 16:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U32>(Opcode::ConvertS16F16, value);
+ case Type::F32:
+ return Inst<U32>(Opcode::ConvertS16F32, value);
+ case Type::F64:
+ return Inst<U32>(Opcode::ConvertS16F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ case 32:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U32>(Opcode::ConvertS32F16, value);
+ case Type::F32:
+ return Inst<U32>(Opcode::ConvertS32F32, value);
+ case Type::F64:
+ return Inst<U32>(Opcode::ConvertS32F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ case 64:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U64>(Opcode::ConvertS64F16, value);
+ case Type::F32:
+ return Inst<U64>(Opcode::ConvertS64F32, value);
+ case Type::F64:
+ return Inst<U64>(Opcode::ConvertS64F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ default:
+ throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+ }
+}
+
+U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
+ switch (bitsize) {
+ case 16:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U32>(Opcode::ConvertU16F16, value);
+ case Type::F32:
+ return Inst<U32>(Opcode::ConvertU16F32, value);
+ case Type::F64:
+ return Inst<U32>(Opcode::ConvertU16F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ case 32:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U32>(Opcode::ConvertU32F16, value);
+ case Type::F32:
+ return Inst<U32>(Opcode::ConvertU32F32, value);
+ case Type::F64:
+ return Inst<U32>(Opcode::ConvertU32F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ case 64:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U64>(Opcode::ConvertU64F16, value);
+ case Type::F32:
+ return Inst<U64>(Opcode::ConvertU64F32, value);
+ case Type::F64:
+ return Inst<U64>(Opcode::ConvertU64F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ default:
+ throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+ }
+}
+
+U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
+ return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value);
+}
+
+F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+ FpControl control) {
+ switch (dest_bitsize) {
+ case 16:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value);
+ case 16:
+ return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value);
+ case 32:
+ return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value);
+ case 64:
+ return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value);
+ }
+ break;
+ case 32:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value);
+ case 16:
+ return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value);
+ case 32:
+ return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value);
+ case 64:
+ return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value);
+ }
+ break;
+ case 64:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value);
+ case 16:
+ return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value);
+ case 32:
+ return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value);
+ case 64:
+ return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value);
+ }
+ break;
+ }
+ throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
+}
+
+F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+ FpControl control) {
+ switch (dest_bitsize) {
+ case 16:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value);
+ case 16:
+ return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value);
+ case 32:
+ return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value);
+ case 64:
+ return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value);
+ }
+ break;
+ case 32:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value);
+ case 16:
+ return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value);
+ case 32:
+ return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value);
+ case 64:
+ return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value);
+ }
+ break;
+ case 64:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value);
+ case 16:
+ return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value);
+ case 32:
+ return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value);
+ case 64:
+ return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value);
+ }
+ break;
+ }
+ throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
+}
+
+F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
+ const Value& value, FpControl control) {
+ return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control)
+ : ConvertUToF(dest_bitsize, src_bitsize, value, control);
+}
+
+U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
+ switch (result_bitsize) {
+ case 32:
+ switch (value.Type()) {
+ case Type::U32:
+ // Nothing to do
+ return value;
+ case Type::U64:
+ return Inst<U32>(Opcode::ConvertU32U64, value);
+ default:
+ break;
+ }
+ break;
+ case 64:
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U64>(Opcode::ConvertU64U32, value);
+ case Type::U64:
+ // Nothing to do
+ return value;
+ default:
+ break;
+ }
+ }
+ throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
+}
+
+F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) {
+ switch (result_bitsize) {
+ case 16:
+ switch (value.Type()) {
+ case Type::F16:
+ // Nothing to do
+ return value;
+ case Type::F32:
+ return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value);
+ case Type::F64:
+ throw LogicError("Illegal conversion from F64 to F16");
+ default:
+ break;
+ }
+ break;
+ case 32:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value);
+ case Type::F32:
+ // Nothing to do
+ return value;
+ case Type::F64:
+ return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value);
+ default:
+ break;
+ }
+ break;
+ case 64:
+ switch (value.Type()) {
+ case Type::F16:
+ throw LogicError("Illegal conversion from F16 to F64");
+ case Type::F32:
+ return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value);
+ case Type::F64:
+ // Nothing to do
+ return value;
+ default:
+ break;
+ }
+ break;
+ }
+ throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
+}
+
+Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
+ const Value& offset, const F32& lod_clamp,
+ TextureInstInfo info) {
+ const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
+ : Opcode::BindlessImageSampleImplicitLod};
+ return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
+}
+
+Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
+ const Value& offset, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
+ : Opcode::BindlessImageSampleExplicitLod};
+ return Inst(op, Flags{info}, handle, coords, lod, offset);
+}
+
+F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
+ const F32& bias, const Value& offset,
+ const F32& lod_clamp, TextureInstInfo info) {
+ const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
+ : Opcode::BindlessImageSampleDrefImplicitLod};
+ return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
+}
+
+F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
+ const F32& lod, const Value& offset,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
+ : Opcode::BindlessImageSampleDrefExplicitLod};
+ return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset);
+}
+
+Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
+ const Value& offset2, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather};
+ return Inst(op, Flags{info}, handle, coords, offset, offset2);
+}
+
+Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
+ const Value& offset2, const F32& dref, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref
+ : Opcode::BindlessImageGatherDref};
+ return Inst(op, Flags{info}, handle, coords, offset, offset2, dref);
+}
+
+Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
+ const U32& lod, const U32& multisampling, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch};
+ return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling);
+}
+
+Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions
+ : Opcode::BindlessImageQueryDimensions};
+ return Inst(op, handle, lod);
+}
+
+Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod
+ : Opcode::BindlessImageQueryLod};
+ return Inst(op, Flags{info}, handle, coords);
+}
+
+Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates,
+ const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
+ : Opcode::BindlessImageGradient};
+ return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp);
+}
+
+Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead};
+ return Inst(op, Flags{info}, handle, coords);
+}
+
+void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
+ Inst(op, Flags{info}, handle, coords, color);
+}
+
+Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32
+ : Opcode::BindlessImageAtomicIAdd32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32
+ : Opcode::BindlessImageAtomicSMin32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32
+ : Opcode::BindlessImageAtomicUMin32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
+ bool is_signed, TextureInstInfo info) {
+ return is_signed ? ImageAtomicSMin(handle, coords, value, info)
+ : ImageAtomicUMin(handle, coords, value, info);
+}
+
+Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32
+ : Opcode::BindlessImageAtomicSMax32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32
+ : Opcode::BindlessImageAtomicUMax32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
+ bool is_signed, TextureInstInfo info) {
+ return is_signed ? ImageAtomicSMax(handle, coords, value, info)
+ : ImageAtomicUMax(handle, coords, value, info);
+}
+
+Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32
+ : Opcode::BindlessImageAtomicInc32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32
+ : Opcode::BindlessImageAtomicDec32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32
+ : Opcode::BindlessImageAtomicAnd32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32
+ : Opcode::BindlessImageAtomicOr32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32
+ : Opcode::BindlessImageAtomicXor32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32
+ : Opcode::BindlessImageAtomicExchange32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+U1 IREmitter::VoteAll(const U1& value) {
+ return Inst<U1>(Opcode::VoteAll, value);
+}
+
+U1 IREmitter::VoteAny(const U1& value) {
+ return Inst<U1>(Opcode::VoteAny, value);
+}
+
+U1 IREmitter::VoteEqual(const U1& value) {
+ return Inst<U1>(Opcode::VoteEqual, value);
+}
+
+U32 IREmitter::SubgroupBallot(const U1& value) {
+ return Inst<U32>(Opcode::SubgroupBallot, value);
+}
+
+U32 IREmitter::SubgroupEqMask() {
+ return Inst<U32>(Opcode::SubgroupEqMask);
+}
+
+U32 IREmitter::SubgroupLtMask() {
+ return Inst<U32>(Opcode::SubgroupLtMask);
+}
+
+U32 IREmitter::SubgroupLeMask() {
+ return Inst<U32>(Opcode::SubgroupLeMask);
+}
+
+U32 IREmitter::SubgroupGtMask() {
+ return Inst<U32>(Opcode::SubgroupGtMask);
+}
+
+U32 IREmitter::SubgroupGeMask() {
+ return Inst<U32>(Opcode::SubgroupGeMask);
+}
+
+U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask) {
+ return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask) {
+ return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask) {
+ return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask) {
+ return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
+}
+
+F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
+ return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
+}
+
+F32 IREmitter::DPdxFine(const F32& a) {
+ return Inst<F32>(Opcode::DPdxFine, a);
+}
+
+F32 IREmitter::DPdyFine(const F32& a) {
+ return Inst<F32>(Opcode::DPdyFine, a);
+}
+
+F32 IREmitter::DPdxCoarse(const F32& a) {
+ return Inst<F32>(Opcode::DPdxCoarse, a);
+}
+
+F32 IREmitter::DPdyCoarse(const F32& a) {
+ return Inst<F32>(Opcode::DPdyCoarse, a);
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
new file mode 100644
index 000000000..53f7b3b06
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -0,0 +1,413 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstring>
+#include <type_traits>
+
+#include "shader_recompiler/frontend/ir/attribute.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class IREmitter {
+public:
+ explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
+ explicit IREmitter(Block& block_, Block::iterator insertion_point_)
+ : block{&block_}, insertion_point{insertion_point_} {}
+
+ Block* block;
+
+ [[nodiscard]] U1 Imm1(bool value) const;
+ [[nodiscard]] U8 Imm8(u8 value) const;
+ [[nodiscard]] U16 Imm16(u16 value) const;
+ [[nodiscard]] U32 Imm32(u32 value) const;
+ [[nodiscard]] U32 Imm32(s32 value) const;
+ [[nodiscard]] F32 Imm32(f32 value) const;
+ [[nodiscard]] U64 Imm64(u64 value) const;
+ [[nodiscard]] U64 Imm64(s64 value) const;
+ [[nodiscard]] F64 Imm64(f64 value) const;
+
+ U1 ConditionRef(const U1& value);
+ void Reference(const Value& value);
+
+ void PhiMove(IR::Inst& phi, const Value& value);
+
+ void Prologue();
+ void Epilogue();
+ void DemoteToHelperInvocation();
+ void EmitVertex(const U32& stream);
+ void EndPrimitive(const U32& stream);
+
+ [[nodiscard]] U32 GetReg(IR::Reg reg);
+ void SetReg(IR::Reg reg, const U32& value);
+
+ [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
+ void SetPred(IR::Pred pred, const U1& value);
+
+ [[nodiscard]] U1 GetGotoVariable(u32 id);
+ void SetGotoVariable(u32 id, const U1& value);
+
+ [[nodiscard]] U32 GetIndirectBranchVariable();
+ void SetIndirectBranchVariable(const U32& value);
+
+ [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
+ [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
+ bool is_signed);
+ [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);
+
+ [[nodiscard]] U1 GetZFlag();
+ [[nodiscard]] U1 GetSFlag();
+ [[nodiscard]] U1 GetCFlag();
+ [[nodiscard]] U1 GetOFlag();
+
+ void SetZFlag(const U1& value);
+ void SetSFlag(const U1& value);
+ void SetCFlag(const U1& value);
+ void SetOFlag(const U1& value);
+
+ [[nodiscard]] U1 Condition(IR::Condition cond);
+ [[nodiscard]] U1 GetFlowTestResult(FlowTest test);
+
+ [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
+ [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
+ void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
+
+ [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
+ [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
+ void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
+
+ [[nodiscard]] F32 GetPatch(Patch patch);
+ void SetPatch(Patch patch, const F32& value);
+
+ void SetFragColor(u32 index, u32 component, const F32& value);
+ void SetSampleMask(const U32& value);
+ void SetFragDepth(const F32& value);
+
+ [[nodiscard]] U32 WorkgroupIdX();
+ [[nodiscard]] U32 WorkgroupIdY();
+ [[nodiscard]] U32 WorkgroupIdZ();
+
+ [[nodiscard]] Value LocalInvocationId();
+ [[nodiscard]] U32 LocalInvocationIdX();
+ [[nodiscard]] U32 LocalInvocationIdY();
+ [[nodiscard]] U32 LocalInvocationIdZ();
+
+ [[nodiscard]] U32 InvocationId();
+ [[nodiscard]] U32 SampleId();
+ [[nodiscard]] U1 IsHelperInvocation();
+ [[nodiscard]] F32 YDirection();
+
+ [[nodiscard]] U32 LaneId();
+
+ [[nodiscard]] U32 LoadGlobalU8(const U64& address);
+ [[nodiscard]] U32 LoadGlobalS8(const U64& address);
+ [[nodiscard]] U32 LoadGlobalU16(const U64& address);
+ [[nodiscard]] U32 LoadGlobalS16(const U64& address);
+ [[nodiscard]] U32 LoadGlobal32(const U64& address);
+ [[nodiscard]] Value LoadGlobal64(const U64& address);
+ [[nodiscard]] Value LoadGlobal128(const U64& address);
+
+ void WriteGlobalU8(const U64& address, const U32& value);
+ void WriteGlobalS8(const U64& address, const U32& value);
+ void WriteGlobalU16(const U64& address, const U32& value);
+ void WriteGlobalS16(const U64& address, const U32& value);
+ void WriteGlobal32(const U64& address, const U32& value);
+ void WriteGlobal64(const U64& address, const IR::Value& vector);
+ void WriteGlobal128(const U64& address, const IR::Value& vector);
+
+ [[nodiscard]] U32 LoadLocal(const U32& word_offset);
+ void WriteLocal(const U32& word_offset, const U32& value);
+
+ [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
+ void WriteShared(int bit_size, const U32& offset, const Value& value);
+
+ [[nodiscard]] U1 GetZeroFromOp(const Value& op);
+ [[nodiscard]] U1 GetSignFromOp(const Value& op);
+ [[nodiscard]] U1 GetCarryFromOp(const Value& op);
+ [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
+ [[nodiscard]] U1 GetSparseFromOp(const Value& op);
+ [[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
+
+ [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
+ [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
+ [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
+ const Value& e4);
+ [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
+ [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
+
+ [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
+ const Value& false_value);
+
+ void Barrier();
+ void WorkgroupMemoryBarrier();
+ void DeviceMemoryBarrier();
+
+ template <typename Dest, typename Source>
+ [[nodiscard]] Dest BitCast(const Source& value);
+
+ [[nodiscard]] U64 PackUint2x32(const Value& vector);
+ [[nodiscard]] Value UnpackUint2x32(const U64& value);
+
+ [[nodiscard]] U32 PackFloat2x16(const Value& vector);
+ [[nodiscard]] Value UnpackFloat2x16(const U32& value);
+
+ [[nodiscard]] U32 PackHalf2x16(const Value& vector);
+ [[nodiscard]] Value UnpackHalf2x16(const U32& value);
+
+ [[nodiscard]] F64 PackDouble2x32(const Value& vector);
+ [[nodiscard]] Value UnpackDouble2x32(const F64& value);
+
+ [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
+ FpControl control = {});
+
+ [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
+ [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
+ [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
+
+ [[nodiscard]] F32 FPCos(const F32& value);
+ [[nodiscard]] F32 FPSin(const F32& value);
+ [[nodiscard]] F32 FPExp2(const F32& value);
+ [[nodiscard]] F32 FPLog2(const F32& value);
+ [[nodiscard]] F32F64 FPRecip(const F32F64& value);
+ [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
+ [[nodiscard]] F32 FPSqrt(const F32& value);
+ [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
+ [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
+ const F16F32F64& max_value);
+ [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
+
+ [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+ bool ordered = true);
+ [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+ bool ordered = true);
+ [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+ bool ordered = true);
+ [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
+ FpControl control = {}, bool ordered = true);
+ [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+ FpControl control = {}, bool ordered = true);
+ [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+ FpControl control = {}, bool ordered = true);
+ [[nodiscard]] U1 FPIsNan(const F16F32F64& value);
+ [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
+ [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
+ [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
+ [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
+
+ [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
+ [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
+ [[nodiscard]] U32 IMul(const U32& a, const U32& b);
+ [[nodiscard]] U32U64 INeg(const U32U64& value);
+ [[nodiscard]] U32 IAbs(const U32& value);
+ [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
+ [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
+ [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
+ [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
+ [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
+ [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
+ [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+ const U32& count);
+ [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+ bool is_signed = false);
+ [[nodiscard]] U32 BitReverse(const U32& value);
+ [[nodiscard]] U32 BitCount(const U32& value);
+ [[nodiscard]] U32 BitwiseNot(const U32& value);
+
+ [[nodiscard]] U32 FindSMsb(const U32& value);
+ [[nodiscard]] U32 FindUMsb(const U32& value);
+ [[nodiscard]] U32 SMin(const U32& a, const U32& b);
+ [[nodiscard]] U32 UMin(const U32& a, const U32& b);
+ [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
+ [[nodiscard]] U32 SMax(const U32& a, const U32& b);
+ [[nodiscard]] U32 UMax(const U32& a, const U32& b);
+ [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
+ [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
+ [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
+
+ [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
+ [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
+ [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+ [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
+ [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
+ [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+
+ [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
+ [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
+ [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
+
+ [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
+ bool is_signed);
+ [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
+ bool is_signed);
+ [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
+ [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
+ [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
+
+ [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+ const FpControl control = {});
+ [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+ const FpControl control = {});
+ [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+ const FpControl control = {});
+ [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+ const FpControl control = {});
+
+ [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
+ [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
+ [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
+ [[nodiscard]] U1 LogicalNot(const U1& value);
+
+ [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
+ [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
+ [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
+ [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+ FpControl control = {});
+ [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+ FpControl control = {});
+ [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
+ const Value& value, FpControl control = {});
+
+ [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
+ [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value,
+ FpControl control = {});
+
+ [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
+ const F32& bias, const Value& offset,
+ const F32& lod_clamp, TextureInstInfo info);
+ [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
+ const F32& lod, const Value& offset,
+ TextureInstInfo info);
+ [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
+ const F32& dref, const F32& bias,
+ const Value& offset, const F32& lod_clamp,
+ TextureInstInfo info);
+ [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
+ const F32& dref, const F32& lod,
+ const Value& offset, TextureInstInfo info);
+ [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
+
+ [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
+ const Value& offset2, TextureInstInfo info);
+ [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
+ const Value& offset, const Value& offset2, const F32& dref,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
+ const U32& lod, const U32& multisampling, TextureInstInfo info);
+ [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
+ const Value& derivates, const Value& offset,
+ const F32& lod_clamp, TextureInstInfo info);
+ [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
+ [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color,
+ TextureInstInfo info);
+
+ [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
+ const Value& value, bool is_signed, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
+ const Value& value, bool is_signed, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] U1 VoteAll(const U1& value);
+ [[nodiscard]] U1 VoteAny(const U1& value);
+ [[nodiscard]] U1 VoteEqual(const U1& value);
+ [[nodiscard]] U32 SubgroupBallot(const U1& value);
+ [[nodiscard]] U32 SubgroupEqMask();
+ [[nodiscard]] U32 SubgroupLtMask();
+ [[nodiscard]] U32 SubgroupLeMask();
+ [[nodiscard]] U32 SubgroupGtMask();
+ [[nodiscard]] U32 SubgroupGeMask();
+ [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask);
+ [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask);
+ [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask);
+ [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
+ const IR::U32& clamp, const IR::U32& seg_mask);
+ [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
+ FpControl control = {});
+
+ [[nodiscard]] F32 DPdxFine(const F32& a);
+
+ [[nodiscard]] F32 DPdyFine(const F32& a);
+
+ [[nodiscard]] F32 DPdxCoarse(const F32& a);
+
+ [[nodiscard]] F32 DPdyCoarse(const F32& a);
+
+private:
+ IR::Block::iterator insertion_point;
+
+ template <typename T = Value, typename... Args>
+ T Inst(Opcode op, Args... args) {
+ auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
+ return T{Value{&*it}};
+ }
+
+ template <typename T>
+ requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
+ Flags() = default;
+ Flags(T proxy_) : proxy{proxy_} {}
+
+ T proxy;
+ };
+
+ template <typename T = Value, typename FlagType, typename... Args>
+ T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
+ u32 raw_flags{};
+ std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
+ auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
+ return T{Value{&*it}};
+ }
+};
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
new file mode 100644
index 000000000..3dfa5a880
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -0,0 +1,411 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+namespace {
+void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
+ if (inst && inst->GetOpcode() != opcode) {
+ throw LogicError("Invalid pseudo-instruction");
+ }
+}
+
+void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
+ if (dest_inst) {
+ throw LogicError("Only one of each type of pseudo-op allowed");
+ }
+ dest_inst = pseudo_inst;
+}
+
+void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
+ if (inst->GetOpcode() != expected_opcode) {
+ throw LogicError("Undoing use of invalid pseudo-op");
+ }
+ inst = nullptr;
+}
+
+void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
+ if (!associated_insts) {
+ associated_insts = std::make_unique<AssociatedInsts>();
+ }
+}
+} // Anonymous namespace
+
+Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
+ if (op == Opcode::Phi) {
+ std::construct_at(&phi_args);
+ } else {
+ std::construct_at(&args);
+ }
+}
+
+Inst::~Inst() {
+ if (op == Opcode::Phi) {
+ std::destroy_at(&phi_args);
+ } else {
+ std::destroy_at(&args);
+ }
+}
+
+bool Inst::MayHaveSideEffects() const noexcept {
+ switch (op) {
+ case Opcode::ConditionRef:
+ case Opcode::Reference:
+ case Opcode::PhiMove:
+ case Opcode::Prologue:
+ case Opcode::Epilogue:
+ case Opcode::Join:
+ case Opcode::DemoteToHelperInvocation:
+ case Opcode::Barrier:
+ case Opcode::WorkgroupMemoryBarrier:
+ case Opcode::DeviceMemoryBarrier:
+ case Opcode::EmitVertex:
+ case Opcode::EndPrimitive:
+ case Opcode::SetAttribute:
+ case Opcode::SetAttributeIndexed:
+ case Opcode::SetPatch:
+ case Opcode::SetFragColor:
+ case Opcode::SetSampleMask:
+ case Opcode::SetFragDepth:
+ case Opcode::WriteGlobalU8:
+ case Opcode::WriteGlobalS8:
+ case Opcode::WriteGlobalU16:
+ case Opcode::WriteGlobalS16:
+ case Opcode::WriteGlobal32:
+ case Opcode::WriteGlobal64:
+ case Opcode::WriteGlobal128:
+ case Opcode::WriteStorageU8:
+ case Opcode::WriteStorageS8:
+ case Opcode::WriteStorageU16:
+ case Opcode::WriteStorageS16:
+ case Opcode::WriteStorage32:
+ case Opcode::WriteStorage64:
+ case Opcode::WriteStorage128:
+ case Opcode::WriteLocal:
+ case Opcode::WriteSharedU8:
+ case Opcode::WriteSharedU16:
+ case Opcode::WriteSharedU32:
+ case Opcode::WriteSharedU64:
+ case Opcode::WriteSharedU128:
+ case Opcode::SharedAtomicIAdd32:
+ case Opcode::SharedAtomicSMin32:
+ case Opcode::SharedAtomicUMin32:
+ case Opcode::SharedAtomicSMax32:
+ case Opcode::SharedAtomicUMax32:
+ case Opcode::SharedAtomicInc32:
+ case Opcode::SharedAtomicDec32:
+ case Opcode::SharedAtomicAnd32:
+ case Opcode::SharedAtomicOr32:
+ case Opcode::SharedAtomicXor32:
+ case Opcode::SharedAtomicExchange32:
+ case Opcode::SharedAtomicExchange64:
+ case Opcode::GlobalAtomicIAdd32:
+ case Opcode::GlobalAtomicSMin32:
+ case Opcode::GlobalAtomicUMin32:
+ case Opcode::GlobalAtomicSMax32:
+ case Opcode::GlobalAtomicUMax32:
+ case Opcode::GlobalAtomicInc32:
+ case Opcode::GlobalAtomicDec32:
+ case Opcode::GlobalAtomicAnd32:
+ case Opcode::GlobalAtomicOr32:
+ case Opcode::GlobalAtomicXor32:
+ case Opcode::GlobalAtomicExchange32:
+ case Opcode::GlobalAtomicIAdd64:
+ case Opcode::GlobalAtomicSMin64:
+ case Opcode::GlobalAtomicUMin64:
+ case Opcode::GlobalAtomicSMax64:
+ case Opcode::GlobalAtomicUMax64:
+ case Opcode::GlobalAtomicAnd64:
+ case Opcode::GlobalAtomicOr64:
+ case Opcode::GlobalAtomicXor64:
+ case Opcode::GlobalAtomicExchange64:
+ case Opcode::GlobalAtomicAddF32:
+ case Opcode::GlobalAtomicAddF16x2:
+ case Opcode::GlobalAtomicAddF32x2:
+ case Opcode::GlobalAtomicMinF16x2:
+ case Opcode::GlobalAtomicMinF32x2:
+ case Opcode::GlobalAtomicMaxF16x2:
+ case Opcode::GlobalAtomicMaxF32x2:
+ case Opcode::StorageAtomicIAdd32:
+ case Opcode::StorageAtomicSMin32:
+ case Opcode::StorageAtomicUMin32:
+ case Opcode::StorageAtomicSMax32:
+ case Opcode::StorageAtomicUMax32:
+ case Opcode::StorageAtomicInc32:
+ case Opcode::StorageAtomicDec32:
+ case Opcode::StorageAtomicAnd32:
+ case Opcode::StorageAtomicOr32:
+ case Opcode::StorageAtomicXor32:
+ case Opcode::StorageAtomicExchange32:
+ case Opcode::StorageAtomicIAdd64:
+ case Opcode::StorageAtomicSMin64:
+ case Opcode::StorageAtomicUMin64:
+ case Opcode::StorageAtomicSMax64:
+ case Opcode::StorageAtomicUMax64:
+ case Opcode::StorageAtomicAnd64:
+ case Opcode::StorageAtomicOr64:
+ case Opcode::StorageAtomicXor64:
+ case Opcode::StorageAtomicExchange64:
+ case Opcode::StorageAtomicAddF32:
+ case Opcode::StorageAtomicAddF16x2:
+ case Opcode::StorageAtomicAddF32x2:
+ case Opcode::StorageAtomicMinF16x2:
+ case Opcode::StorageAtomicMinF32x2:
+ case Opcode::StorageAtomicMaxF16x2:
+ case Opcode::StorageAtomicMaxF32x2:
+ case Opcode::BindlessImageWrite:
+ case Opcode::BoundImageWrite:
+ case Opcode::ImageWrite:
+ case IR::Opcode::BindlessImageAtomicIAdd32:
+ case IR::Opcode::BindlessImageAtomicSMin32:
+ case IR::Opcode::BindlessImageAtomicUMin32:
+ case IR::Opcode::BindlessImageAtomicSMax32:
+ case IR::Opcode::BindlessImageAtomicUMax32:
+ case IR::Opcode::BindlessImageAtomicInc32:
+ case IR::Opcode::BindlessImageAtomicDec32:
+ case IR::Opcode::BindlessImageAtomicAnd32:
+ case IR::Opcode::BindlessImageAtomicOr32:
+ case IR::Opcode::BindlessImageAtomicXor32:
+ case IR::Opcode::BindlessImageAtomicExchange32:
+ case IR::Opcode::BoundImageAtomicIAdd32:
+ case IR::Opcode::BoundImageAtomicSMin32:
+ case IR::Opcode::BoundImageAtomicUMin32:
+ case IR::Opcode::BoundImageAtomicSMax32:
+ case IR::Opcode::BoundImageAtomicUMax32:
+ case IR::Opcode::BoundImageAtomicInc32:
+ case IR::Opcode::BoundImageAtomicDec32:
+ case IR::Opcode::BoundImageAtomicAnd32:
+ case IR::Opcode::BoundImageAtomicOr32:
+ case IR::Opcode::BoundImageAtomicXor32:
+ case IR::Opcode::BoundImageAtomicExchange32:
+ case IR::Opcode::ImageAtomicIAdd32:
+ case IR::Opcode::ImageAtomicSMin32:
+ case IR::Opcode::ImageAtomicUMin32:
+ case IR::Opcode::ImageAtomicSMax32:
+ case IR::Opcode::ImageAtomicUMax32:
+ case IR::Opcode::ImageAtomicInc32:
+ case IR::Opcode::ImageAtomicDec32:
+ case IR::Opcode::ImageAtomicAnd32:
+ case IR::Opcode::ImageAtomicOr32:
+ case IR::Opcode::ImageAtomicXor32:
+ case IR::Opcode::ImageAtomicExchange32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool Inst::IsPseudoInstruction() const noexcept {
+ switch (op) {
+ case Opcode::GetZeroFromOp:
+ case Opcode::GetSignFromOp:
+ case Opcode::GetCarryFromOp:
+ case Opcode::GetOverflowFromOp:
+ case Opcode::GetSparseFromOp:
+ case Opcode::GetInBoundsFromOp:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool Inst::AreAllArgsImmediates() const {
+ if (op == Opcode::Phi) {
+ throw LogicError("Testing for all arguments are immediates on phi instruction");
+ }
+ return std::all_of(args.begin(), args.begin() + NumArgs(),
+ [](const IR::Value& value) { return value.IsImmediate(); });
+}
+
+Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
+ if (!associated_insts) {
+ return nullptr;
+ }
+ switch (opcode) {
+ case Opcode::GetZeroFromOp:
+ CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
+ return associated_insts->zero_inst;
+ case Opcode::GetSignFromOp:
+ CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
+ return associated_insts->sign_inst;
+ case Opcode::GetCarryFromOp:
+ CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
+ return associated_insts->carry_inst;
+ case Opcode::GetOverflowFromOp:
+ CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
+ return associated_insts->overflow_inst;
+ case Opcode::GetSparseFromOp:
+ CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
+ return associated_insts->sparse_inst;
+ case Opcode::GetInBoundsFromOp:
+ CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp);
+ return associated_insts->in_bounds_inst;
+ default:
+ throw InvalidArgument("{} is not a pseudo-instruction", opcode);
+ }
+}
+
+IR::Type Inst::Type() const {
+ return TypeOf(op);
+}
+
+void Inst::SetArg(size_t index, Value value) {
+ if (index >= NumArgs()) {
+ throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
+ }
+ const IR::Value arg{Arg(index)};
+ if (!arg.IsImmediate()) {
+ UndoUse(arg);
+ }
+ if (!value.IsImmediate()) {
+ Use(value);
+ }
+ if (op == Opcode::Phi) {
+ phi_args[index].second = value;
+ } else {
+ args[index] = value;
+ }
+}
+
+Block* Inst::PhiBlock(size_t index) const {
+ if (op != Opcode::Phi) {
+ throw LogicError("{} is not a Phi instruction", op);
+ }
+ if (index >= phi_args.size()) {
+ throw InvalidArgument("Out of bounds argument index {} in phi instruction");
+ }
+ return phi_args[index].first;
+}
+
+void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
+ if (!value.IsImmediate()) {
+ Use(value);
+ }
+ phi_args.emplace_back(predecessor, value);
+}
+
+void Inst::Invalidate() {
+ ClearArgs();
+ ReplaceOpcode(Opcode::Void);
+}
+
+void Inst::ClearArgs() {
+ if (op == Opcode::Phi) {
+ for (auto& pair : phi_args) {
+ IR::Value& value{pair.second};
+ if (!value.IsImmediate()) {
+ UndoUse(value);
+ }
+ }
+ phi_args.clear();
+ } else {
+ for (auto& value : args) {
+ if (!value.IsImmediate()) {
+ UndoUse(value);
+ }
+ }
+ // Reset arguments to null
+ // std::memset was measured to be faster on MSVC than std::ranges:fill
+ std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
+ }
+}
+
+void Inst::ReplaceUsesWith(Value replacement) {
+ Invalidate();
+ ReplaceOpcode(Opcode::Identity);
+ if (!replacement.IsImmediate()) {
+ Use(replacement);
+ }
+ args[0] = replacement;
+}
+
+void Inst::ReplaceOpcode(IR::Opcode opcode) {
+ if (opcode == IR::Opcode::Phi) {
+ throw LogicError("Cannot transition into Phi");
+ }
+ if (op == Opcode::Phi) {
+ // Transition out of phi arguments into non-phi
+ std::destroy_at(&phi_args);
+ std::construct_at(&args);
+ }
+ op = opcode;
+}
+
+void Inst::Use(const Value& value) {
+ Inst* const inst{value.Inst()};
+ ++inst->use_count;
+
+ std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
+ switch (op) {
+ case Opcode::GetZeroFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->zero_inst, this);
+ break;
+ case Opcode::GetSignFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->sign_inst, this);
+ break;
+ case Opcode::GetCarryFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->carry_inst, this);
+ break;
+ case Opcode::GetOverflowFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->overflow_inst, this);
+ break;
+ case Opcode::GetSparseFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->sparse_inst, this);
+ break;
+ case Opcode::GetInBoundsFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->in_bounds_inst, this);
+ break;
+ default:
+ break;
+ }
+}
+
+void Inst::UndoUse(const Value& value) {
+ Inst* const inst{value.Inst()};
+ --inst->use_count;
+
+ std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
+ switch (op) {
+ case Opcode::GetZeroFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
+ break;
+ case Opcode::GetSignFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
+ break;
+ case Opcode::GetCarryFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
+ break;
+ case Opcode::GetOverflowFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
+ break;
+ case Opcode::GetSparseFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp);
+ break;
+ case Opcode::GetInBoundsFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 000000000..77cda1f8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,49 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::IR {
+
+enum class FmzMode : u8 {
+ DontCare, // Not specified for this instruction
+ FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
+ FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9)
+ None, // Denorms are not flushed, NAN is propagated (nouveau)
+};
+
+enum class FpRounding : u8 {
+ DontCare, // Not specified for this instruction
+ RN, // Round to nearest even,
+ RM, // Round towards negative infinity
+ RP, // Round towards positive infinity
+ RZ, // Round towards zero
+};
+
+struct FpControl {
+ bool no_contraction{false};
+ FpRounding rounding{FpRounding::DontCare};
+ FmzMode fmz_mode{FmzMode::DontCare};
+};
+static_assert(sizeof(FpControl) <= sizeof(u32));
+
+union TextureInstInfo {
+ u32 raw;
+ BitField<0, 16, u32> descriptor_index;
+ BitField<16, 3, TextureType> type;
+ BitField<19, 1, u32> is_depth;
+ BitField<20, 1, u32> has_bias;
+ BitField<21, 1, u32> has_lod_clamp;
+ BitField<22, 1, u32> relaxed_precision;
+ BitField<23, 2, u32> gather_component;
+ BitField<25, 2, u32> num_derivates;
+ BitField<27, 3, ImageFormat> image_format;
+};
+static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
new file mode 100644
index 000000000..24d024ad7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -0,0 +1,15 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/frontend/ir/opcodes.h"
+
+namespace Shader::IR {
+
+std::string_view NameOf(Opcode op) {
+ return Detail::META_TABLE[static_cast<size_t>(op)].name;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
new file mode 100644
index 000000000..9ab108292
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/type.h"
+
+namespace Shader::IR {
+
+enum class Opcode {
+#define OPCODE(name, ...) name,
+#include "opcodes.inc"
+#undef OPCODE
+};
+
+namespace Detail {
+struct OpcodeMeta {
+ std::string_view name;
+ Type type;
+ std::array<Type, 5> arg_types;
+};
+
+// using enum Type;
+constexpr Type Void{Type::Void};
+constexpr Type Opaque{Type::Opaque};
+constexpr Type Reg{Type::Reg};
+constexpr Type Pred{Type::Pred};
+constexpr Type Attribute{Type::Attribute};
+constexpr Type Patch{Type::Patch};
+constexpr Type U1{Type::U1};
+constexpr Type U8{Type::U8};
+constexpr Type U16{Type::U16};
+constexpr Type U32{Type::U32};
+constexpr Type U64{Type::U64};
+constexpr Type F16{Type::F16};
+constexpr Type F32{Type::F32};
+constexpr Type F64{Type::F64};
+constexpr Type U32x2{Type::U32x2};
+constexpr Type U32x3{Type::U32x3};
+constexpr Type U32x4{Type::U32x4};
+constexpr Type F16x2{Type::F16x2};
+constexpr Type F16x3{Type::F16x3};
+constexpr Type F16x4{Type::F16x4};
+constexpr Type F32x2{Type::F32x2};
+constexpr Type F32x3{Type::F32x3};
+constexpr Type F32x4{Type::F32x4};
+constexpr Type F64x2{Type::F64x2};
+constexpr Type F64x3{Type::F64x3};
+constexpr Type F64x4{Type::F64x4};
+
+constexpr OpcodeMeta META_TABLE[]{
+#define OPCODE(name_token, type_token, ...) \
+ { \
+ .name{#name_token}, \
+ .type = type_token, \
+ .arg_types{__VA_ARGS__}, \
+ },
+#include "opcodes.inc"
+#undef OPCODE
+};
+constexpr size_t CalculateNumArgsOf(Opcode op) {
+ const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
+ return static_cast<size_t>(
+ std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)));
+}
+
+constexpr u8 NUM_ARGS[]{
+#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)),
+#include "opcodes.inc"
+#undef OPCODE
+};
+} // namespace Detail
+
+/// Get return type of an opcode
+[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
+ return Detail::META_TABLE[static_cast<size_t>(op)].type;
+}
+
+/// Get the number of arguments an opcode accepts
+[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
+ return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]);
+}
+
+/// Get the required type of an argument of an opcode
+[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
+ return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
+}
+
+/// Get the name of an opcode
+[[nodiscard]] std::string_view NameOf(Opcode op);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Opcode> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Opcode& op, FormatContext& ctx) {
+ return format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
new file mode 100644
index 000000000..d91098c80
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -0,0 +1,550 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
+OPCODE(Phi, Opaque, )
+OPCODE(Identity, Opaque, Opaque, )
+OPCODE(Void, Void, )
+OPCODE(ConditionRef, U1, U1, )
+OPCODE(Reference, Void, Opaque, )
+OPCODE(PhiMove, Void, Opaque, Opaque, )
+
+// Special operations
+OPCODE(Prologue, Void, )
+OPCODE(Epilogue, Void, )
+OPCODE(Join, Void, )
+OPCODE(DemoteToHelperInvocation, Void, )
+OPCODE(EmitVertex, Void, U32, )
+OPCODE(EndPrimitive, Void, U32, )
+
+// Barriers
+OPCODE(Barrier, Void, )
+OPCODE(WorkgroupMemoryBarrier, Void, )
+OPCODE(DeviceMemoryBarrier, Void, )
+
+// Context getters/setters
+OPCODE(GetRegister, U32, Reg, )
+OPCODE(SetRegister, Void, Reg, U32, )
+OPCODE(GetPred, U1, Pred, )
+OPCODE(SetPred, Void, Pred, U1, )
+OPCODE(GetGotoVariable, U1, U32, )
+OPCODE(SetGotoVariable, Void, U32, U1, )
+OPCODE(GetIndirectBranchVariable, U32, )
+OPCODE(SetIndirectBranchVariable, Void, U32, )
+OPCODE(GetCbufU8, U32, U32, U32, )
+OPCODE(GetCbufS8, U32, U32, U32, )
+OPCODE(GetCbufU16, U32, U32, U32, )
+OPCODE(GetCbufS16, U32, U32, U32, )
+OPCODE(GetCbufU32, U32, U32, U32, )
+OPCODE(GetCbufF32, F32, U32, U32, )
+OPCODE(GetCbufU32x2, U32x2, U32, U32, )
+OPCODE(GetAttribute, F32, Attribute, U32, )
+OPCODE(SetAttribute, Void, Attribute, F32, U32, )
+OPCODE(GetAttributeIndexed, F32, U32, U32, )
+OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
+OPCODE(GetPatch, F32, Patch, )
+OPCODE(SetPatch, Void, Patch, F32, )
+OPCODE(SetFragColor, Void, U32, U32, F32, )
+OPCODE(SetSampleMask, Void, U32, )
+OPCODE(SetFragDepth, Void, F32, )
+OPCODE(GetZFlag, U1, Void, )
+OPCODE(GetSFlag, U1, Void, )
+OPCODE(GetCFlag, U1, Void, )
+OPCODE(GetOFlag, U1, Void, )
+OPCODE(SetZFlag, Void, U1, )
+OPCODE(SetSFlag, Void, U1, )
+OPCODE(SetCFlag, Void, U1, )
+OPCODE(SetOFlag, Void, U1, )
+OPCODE(WorkgroupId, U32x3, )
+OPCODE(LocalInvocationId, U32x3, )
+OPCODE(InvocationId, U32, )
+OPCODE(SampleId, U32, )
+OPCODE(IsHelperInvocation, U1, )
+OPCODE(YDirection, F32, )
+
+// Undefined
+OPCODE(UndefU1, U1, )
+OPCODE(UndefU8, U8, )
+OPCODE(UndefU16, U16, )
+OPCODE(UndefU32, U32, )
+OPCODE(UndefU64, U64, )
+
+// Memory operations
+OPCODE(LoadGlobalU8, U32, Opaque, )
+OPCODE(LoadGlobalS8, U32, Opaque, )
+OPCODE(LoadGlobalU16, U32, Opaque, )
+OPCODE(LoadGlobalS16, U32, Opaque, )
+OPCODE(LoadGlobal32, U32, Opaque, )
+OPCODE(LoadGlobal64, U32x2, Opaque, )
+OPCODE(LoadGlobal128, U32x4, Opaque, )
+OPCODE(WriteGlobalU8, Void, Opaque, U32, )
+OPCODE(WriteGlobalS8, Void, Opaque, U32, )
+OPCODE(WriteGlobalU16, Void, Opaque, U32, )
+OPCODE(WriteGlobalS16, Void, Opaque, U32, )
+OPCODE(WriteGlobal32, Void, Opaque, U32, )
+OPCODE(WriteGlobal64, Void, Opaque, U32x2, )
+OPCODE(WriteGlobal128, Void, Opaque, U32x4, )
+
+// Storage buffer operations
+OPCODE(LoadStorageU8, U32, U32, U32, )
+OPCODE(LoadStorageS8, U32, U32, U32, )
+OPCODE(LoadStorageU16, U32, U32, U32, )
+OPCODE(LoadStorageS16, U32, U32, U32, )
+OPCODE(LoadStorage32, U32, U32, U32, )
+OPCODE(LoadStorage64, U32x2, U32, U32, )
+OPCODE(LoadStorage128, U32x4, U32, U32, )
+OPCODE(WriteStorageU8, Void, U32, U32, U32, )
+OPCODE(WriteStorageS8, Void, U32, U32, U32, )
+OPCODE(WriteStorageU16, Void, U32, U32, U32, )
+OPCODE(WriteStorageS16, Void, U32, U32, U32, )
+OPCODE(WriteStorage32, Void, U32, U32, U32, )
+OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
+OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
+
+// Local memory operations
+OPCODE(LoadLocal, U32, U32, )
+OPCODE(WriteLocal, Void, U32, U32, )
+
+// Shared memory operations
+OPCODE(LoadSharedU8, U32, U32, )
+OPCODE(LoadSharedS8, U32, U32, )
+OPCODE(LoadSharedU16, U32, U32, )
+OPCODE(LoadSharedS16, U32, U32, )
+OPCODE(LoadSharedU32, U32, U32, )
+OPCODE(LoadSharedU64, U32x2, U32, )
+OPCODE(LoadSharedU128, U32x4, U32, )
+OPCODE(WriteSharedU8, Void, U32, U32, )
+OPCODE(WriteSharedU16, Void, U32, U32, )
+OPCODE(WriteSharedU32, Void, U32, U32, )
+OPCODE(WriteSharedU64, Void, U32, U32x2, )
+OPCODE(WriteSharedU128, Void, U32, U32x4, )
+
+// Vector utility
+OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
+OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
+OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
+OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
+OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
+OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
+OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
+OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
+OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
+OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
+OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
+OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
+OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
+OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
+OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
+OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
+OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
+OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
+OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
+OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
+OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
+OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
+OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
+OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
+OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
+OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
+OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
+OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
+OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
+OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
+OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
+OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
+OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
+OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
+OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
+OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
+
+// Select operations
+OPCODE(SelectU1, U1, U1, U1, U1, )
+OPCODE(SelectU8, U8, U1, U8, U8, )
+OPCODE(SelectU16, U16, U1, U16, U16, )
+OPCODE(SelectU32, U32, U1, U32, U32, )
+OPCODE(SelectU64, U64, U1, U64, U64, )
+OPCODE(SelectF16, F16, U1, F16, F16, )
+OPCODE(SelectF32, F32, U1, F32, F32, )
+OPCODE(SelectF64, F64, U1, F64, F64, )
+
+// Bitwise conversions
+OPCODE(BitCastU16F16, U16, F16, )
+OPCODE(BitCastU32F32, U32, F32, )
+OPCODE(BitCastU64F64, U64, F64, )
+OPCODE(BitCastF16U16, F16, U16, )
+OPCODE(BitCastF32U32, F32, U32, )
+OPCODE(BitCastF64U64, F64, U64, )
+OPCODE(PackUint2x32, U64, U32x2, )
+OPCODE(UnpackUint2x32, U32x2, U64, )
+OPCODE(PackFloat2x16, U32, F16x2, )
+OPCODE(UnpackFloat2x16, F16x2, U32, )
+OPCODE(PackHalf2x16, U32, F32x2, )
+OPCODE(UnpackHalf2x16, F32x2, U32, )
+OPCODE(PackDouble2x32, F64, U32x2, )
+OPCODE(UnpackDouble2x32, U32x2, F64, )
+
+// Pseudo-operation, handled specially at final emit
+OPCODE(GetZeroFromOp, U1, Opaque, )
+OPCODE(GetSignFromOp, U1, Opaque, )
+OPCODE(GetCarryFromOp, U1, Opaque, )
+OPCODE(GetOverflowFromOp, U1, Opaque, )
+OPCODE(GetSparseFromOp, U1, Opaque, )
+OPCODE(GetInBoundsFromOp, U1, Opaque, )
+
+// Floating-point operations
+OPCODE(FPAbs16, F16, F16, )
+OPCODE(FPAbs32, F32, F32, )
+OPCODE(FPAbs64, F64, F64, )
+OPCODE(FPAdd16, F16, F16, F16, )
+OPCODE(FPAdd32, F32, F32, F32, )
+OPCODE(FPAdd64, F64, F64, F64, )
+OPCODE(FPFma16, F16, F16, F16, F16, )
+OPCODE(FPFma32, F32, F32, F32, F32, )
+OPCODE(FPFma64, F64, F64, F64, F64, )
+OPCODE(FPMax32, F32, F32, F32, )
+OPCODE(FPMax64, F64, F64, F64, )
+OPCODE(FPMin32, F32, F32, F32, )
+OPCODE(FPMin64, F64, F64, F64, )
+OPCODE(FPMul16, F16, F16, F16, )
+OPCODE(FPMul32, F32, F32, F32, )
+OPCODE(FPMul64, F64, F64, F64, )
+OPCODE(FPNeg16, F16, F16, )
+OPCODE(FPNeg32, F32, F32, )
+OPCODE(FPNeg64, F64, F64, )
+OPCODE(FPRecip32, F32, F32, )
+OPCODE(FPRecip64, F64, F64, )
+OPCODE(FPRecipSqrt32, F32, F32, )
+OPCODE(FPRecipSqrt64, F64, F64, )
+OPCODE(FPSqrt, F32, F32, )
+OPCODE(FPSin, F32, F32, )
+OPCODE(FPExp2, F32, F32, )
+OPCODE(FPCos, F32, F32, )
+OPCODE(FPLog2, F32, F32, )
+OPCODE(FPSaturate16, F16, F16, )
+OPCODE(FPSaturate32, F32, F32, )
+OPCODE(FPSaturate64, F64, F64, )
+OPCODE(FPClamp16, F16, F16, F16, F16, )
+OPCODE(FPClamp32, F32, F32, F32, F32, )
+OPCODE(FPClamp64, F64, F64, F64, F64, )
+OPCODE(FPRoundEven16, F16, F16, )
+OPCODE(FPRoundEven32, F32, F32, )
+OPCODE(FPRoundEven64, F64, F64, )
+OPCODE(FPFloor16, F16, F16, )
+OPCODE(FPFloor32, F32, F32, )
+OPCODE(FPFloor64, F64, F64, )
+OPCODE(FPCeil16, F16, F16, )
+OPCODE(FPCeil32, F32, F32, )
+OPCODE(FPCeil64, F64, F64, )
+OPCODE(FPTrunc16, F16, F16, )
+OPCODE(FPTrunc32, F32, F32, )
+OPCODE(FPTrunc64, F64, F64, )
+
+OPCODE(FPOrdEqual16, U1, F16, F16, )
+OPCODE(FPOrdEqual32, U1, F32, F32, )
+OPCODE(FPOrdEqual64, U1, F64, F64, )
+OPCODE(FPUnordEqual16, U1, F16, F16, )
+OPCODE(FPUnordEqual32, U1, F32, F32, )
+OPCODE(FPUnordEqual64, U1, F64, F64, )
+OPCODE(FPOrdNotEqual16, U1, F16, F16, )
+OPCODE(FPOrdNotEqual32, U1, F32, F32, )
+OPCODE(FPOrdNotEqual64, U1, F64, F64, )
+OPCODE(FPUnordNotEqual16, U1, F16, F16, )
+OPCODE(FPUnordNotEqual32, U1, F32, F32, )
+OPCODE(FPUnordNotEqual64, U1, F64, F64, )
+OPCODE(FPOrdLessThan16, U1, F16, F16, )
+OPCODE(FPOrdLessThan32, U1, F32, F32, )
+OPCODE(FPOrdLessThan64, U1, F64, F64, )
+OPCODE(FPUnordLessThan16, U1, F16, F16, )
+OPCODE(FPUnordLessThan32, U1, F32, F32, )
+OPCODE(FPUnordLessThan64, U1, F64, F64, )
+OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
+OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
+OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
+OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
+OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
+OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
+OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
+OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
+OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
+OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
+OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
+OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
+OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
+OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
+OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
+OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
+OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
+OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
+OPCODE(FPIsNan16, U1, F16, )
+OPCODE(FPIsNan32, U1, F32, )
+OPCODE(FPIsNan64, U1, F64, )
+
+// Integer operations
+OPCODE(IAdd32, U32, U32, U32, )
+OPCODE(IAdd64, U64, U64, U64, )
+OPCODE(ISub32, U32, U32, U32, )
+OPCODE(ISub64, U64, U64, U64, )
+OPCODE(IMul32, U32, U32, U32, )
+OPCODE(INeg32, U32, U32, )
+OPCODE(INeg64, U64, U64, )
+OPCODE(IAbs32, U32, U32, )
+OPCODE(ShiftLeftLogical32, U32, U32, U32, )
+OPCODE(ShiftLeftLogical64, U64, U64, U32, )
+OPCODE(ShiftRightLogical32, U32, U32, U32, )
+OPCODE(ShiftRightLogical64, U64, U64, U32, )
+OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
+OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
+OPCODE(BitwiseAnd32, U32, U32, U32, )
+OPCODE(BitwiseOr32, U32, U32, U32, )
+OPCODE(BitwiseXor32, U32, U32, U32, )
+OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
+OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
+OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
+OPCODE(BitReverse32, U32, U32, )
+OPCODE(BitCount32, U32, U32, )
+OPCODE(BitwiseNot32, U32, U32, )
+
+OPCODE(FindSMsb32, U32, U32, )
+OPCODE(FindUMsb32, U32, U32, )
+OPCODE(SMin32, U32, U32, U32, )
+OPCODE(UMin32, U32, U32, U32, )
+OPCODE(SMax32, U32, U32, U32, )
+OPCODE(UMax32, U32, U32, U32, )
+OPCODE(SClamp32, U32, U32, U32, U32, )
+OPCODE(UClamp32, U32, U32, U32, U32, )
+OPCODE(SLessThan, U1, U32, U32, )
+OPCODE(ULessThan, U1, U32, U32, )
+OPCODE(IEqual, U1, U32, U32, )
+OPCODE(SLessThanEqual, U1, U32, U32, )
+OPCODE(ULessThanEqual, U1, U32, U32, )
+OPCODE(SGreaterThan, U1, U32, U32, )
+OPCODE(UGreaterThan, U1, U32, U32, )
+OPCODE(INotEqual, U1, U32, U32, )
+OPCODE(SGreaterThanEqual, U1, U32, U32, )
+OPCODE(UGreaterThanEqual, U1, U32, U32, )
+
+// Atomic operations
+OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
+OPCODE(SharedAtomicSMin32, U32, U32, U32, )
+OPCODE(SharedAtomicUMin32, U32, U32, U32, )
+OPCODE(SharedAtomicSMax32, U32, U32, U32, )
+OPCODE(SharedAtomicUMax32, U32, U32, U32, )
+OPCODE(SharedAtomicInc32, U32, U32, U32, )
+OPCODE(SharedAtomicDec32, U32, U32, U32, )
+OPCODE(SharedAtomicAnd32, U32, U32, U32, )
+OPCODE(SharedAtomicOr32, U32, U32, U32, )
+OPCODE(SharedAtomicXor32, U32, U32, U32, )
+OPCODE(SharedAtomicExchange32, U32, U32, U32, )
+OPCODE(SharedAtomicExchange64, U64, U32, U64, )
+
+OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
+OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
+OPCODE(GlobalAtomicUMin32, U32, U64, U32, )
+OPCODE(GlobalAtomicSMax32, U32, U64, U32, )
+OPCODE(GlobalAtomicUMax32, U32, U64, U32, )
+OPCODE(GlobalAtomicInc32, U32, U64, U32, )
+OPCODE(GlobalAtomicDec32, U32, U64, U32, )
+OPCODE(GlobalAtomicAnd32, U32, U64, U32, )
+OPCODE(GlobalAtomicOr32, U32, U64, U32, )
+OPCODE(GlobalAtomicXor32, U32, U64, U32, )
+OPCODE(GlobalAtomicExchange32, U32, U64, U32, )
+OPCODE(GlobalAtomicIAdd64, U64, U64, U64, )
+OPCODE(GlobalAtomicSMin64, U64, U64, U64, )
+OPCODE(GlobalAtomicUMin64, U64, U64, U64, )
+OPCODE(GlobalAtomicSMax64, U64, U64, U64, )
+OPCODE(GlobalAtomicUMax64, U64, U64, U64, )
+OPCODE(GlobalAtomicAnd64, U64, U64, U64, )
+OPCODE(GlobalAtomicOr64, U64, U64, U64, )
+OPCODE(GlobalAtomicXor64, U64, U64, U64, )
+OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
+OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
+OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
+OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
+OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, )
+OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, )
+OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, )
+OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, )
+
+OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicInc32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicDec32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicOr32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicXor32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
+OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
+OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
+OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, )
+OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, )
+OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, )
+OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, )
+
+// Logical operations
+OPCODE(LogicalOr, U1, U1, U1, )
+OPCODE(LogicalAnd, U1, U1, U1, )
+OPCODE(LogicalXor, U1, U1, U1, )
+OPCODE(LogicalNot, U1, U1, )
+
+// Conversion operations
+OPCODE(ConvertS16F16, U32, F16, )
+OPCODE(ConvertS16F32, U32, F32, )
+OPCODE(ConvertS16F64, U32, F64, )
+OPCODE(ConvertS32F16, U32, F16, )
+OPCODE(ConvertS32F32, U32, F32, )
+OPCODE(ConvertS32F64, U32, F64, )
+OPCODE(ConvertS64F16, U64, F16, )
+OPCODE(ConvertS64F32, U64, F32, )
+OPCODE(ConvertS64F64, U64, F64, )
+OPCODE(ConvertU16F16, U32, F16, )
+OPCODE(ConvertU16F32, U32, F32, )
+OPCODE(ConvertU16F64, U32, F64, )
+OPCODE(ConvertU32F16, U32, F16, )
+OPCODE(ConvertU32F32, U32, F32, )
+OPCODE(ConvertU32F64, U32, F64, )
+OPCODE(ConvertU64F16, U64, F16, )
+OPCODE(ConvertU64F32, U64, F32, )
+OPCODE(ConvertU64F64, U64, F64, )
+OPCODE(ConvertU64U32, U64, U32, )
+OPCODE(ConvertU32U64, U32, U64, )
+OPCODE(ConvertF16F32, F16, F32, )
+OPCODE(ConvertF32F16, F32, F16, )
+OPCODE(ConvertF32F64, F32, F64, )
+OPCODE(ConvertF64F32, F64, F32, )
+OPCODE(ConvertF16S8, F16, U32, )
+OPCODE(ConvertF16S16, F16, U32, )
+OPCODE(ConvertF16S32, F16, U32, )
+OPCODE(ConvertF16S64, F16, U64, )
+OPCODE(ConvertF16U8, F16, U32, )
+OPCODE(ConvertF16U16, F16, U32, )
+OPCODE(ConvertF16U32, F16, U32, )
+OPCODE(ConvertF16U64, F16, U64, )
+OPCODE(ConvertF32S8, F32, U32, )
+OPCODE(ConvertF32S16, F32, U32, )
+OPCODE(ConvertF32S32, F32, U32, )
+OPCODE(ConvertF32S64, F32, U64, )
+OPCODE(ConvertF32U8, F32, U32, )
+OPCODE(ConvertF32U16, F32, U32, )
+OPCODE(ConvertF32U32, F32, U32, )
+OPCODE(ConvertF32U64, F32, U64, )
+OPCODE(ConvertF64S8, F64, U32, )
+OPCODE(ConvertF64S16, F64, U32, )
+OPCODE(ConvertF64S32, F64, U32, )
+OPCODE(ConvertF64S64, F64, U64, )
+OPCODE(ConvertF64U8, F64, U32, )
+OPCODE(ConvertF64U16, F64, U32, )
+OPCODE(ConvertF64U32, F64, U32, )
+OPCODE(ConvertF64U64, F64, U64, )
+
+// Image operations
+OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
+OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
+OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, )
+OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, )
+OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(BindlessImageRead, U32x4, U32, Opaque, )
+OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, )
+
+OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
+OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
+OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, )
+OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, )
+OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(BoundImageRead, U32x4, U32, Opaque, )
+OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, )
+
+OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
+OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
+OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
+OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
+OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, )
+OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
+OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(ImageRead, U32x4, Opaque, Opaque, )
+OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
+
+// Atomic Image operations
+
+OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, )
+
+OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, )
+
+OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
+
+// Warp operations
+OPCODE(LaneId, U32, )
+OPCODE(VoteAll, U1, U1, )
+OPCODE(VoteAny, U1, U1, )
+OPCODE(VoteEqual, U1, U1, )
+OPCODE(SubgroupBallot, U32, U1, )
+OPCODE(SubgroupEqMask, U32, )
+OPCODE(SubgroupLtMask, U32, )
+OPCODE(SubgroupLeMask, U32, )
+OPCODE(SubgroupGtMask, U32, )
+OPCODE(SubgroupGeMask, U32, )
+OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, )
+OPCODE(ShuffleUp, U32, U32, U32, U32, U32, )
+OPCODE(ShuffleDown, U32, U32, U32, U32, U32, )
+OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, )
+OPCODE(FSwizzleAdd, F32, F32, F32, U32, )
+OPCODE(DPdxFine, F32, F32, )
+OPCODE(DPdyFine, F32, F32, )
+OPCODE(DPdxCoarse, F32, F32, )
+OPCODE(DPdyCoarse, F32, F32, )
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp
new file mode 100644
index 000000000..4c956a970
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/patch.h"
+
+namespace Shader::IR {
+
+bool IsGeneric(Patch patch) noexcept {
+ return patch >= Patch::Component0 && patch <= Patch::Component119;
+}
+
+u32 GenericPatchIndex(Patch patch) {
+ if (!IsGeneric(patch)) {
+ throw InvalidArgument("Patch {} is not generic", patch);
+ }
+ return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
+}
+
+u32 GenericPatchElement(Patch patch) {
+ if (!IsGeneric(patch)) {
+ throw InvalidArgument("Patch {} is not generic", patch);
+ }
+ return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h
new file mode 100644
index 000000000..6d66ff0d6
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.h
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class Patch : u64 {
+ TessellationLodLeft,
+ TessellationLodTop,
+ TessellationLodRight,
+ TessellationLodBottom,
+ TessellationLodInteriorU,
+ TessellationLodInteriorV,
+ ComponentPadding0,
+ ComponentPadding1,
+ Component0,
+ Component1,
+ Component2,
+ Component3,
+ Component4,
+ Component5,
+ Component6,
+ Component7,
+ Component8,
+ Component9,
+ Component10,
+ Component11,
+ Component12,
+ Component13,
+ Component14,
+ Component15,
+ Component16,
+ Component17,
+ Component18,
+ Component19,
+ Component20,
+ Component21,
+ Component22,
+ Component23,
+ Component24,
+ Component25,
+ Component26,
+ Component27,
+ Component28,
+ Component29,
+ Component30,
+ Component31,
+ Component32,
+ Component33,
+ Component34,
+ Component35,
+ Component36,
+ Component37,
+ Component38,
+ Component39,
+ Component40,
+ Component41,
+ Component42,
+ Component43,
+ Component44,
+ Component45,
+ Component46,
+ Component47,
+ Component48,
+ Component49,
+ Component50,
+ Component51,
+ Component52,
+ Component53,
+ Component54,
+ Component55,
+ Component56,
+ Component57,
+ Component58,
+ Component59,
+ Component60,
+ Component61,
+ Component62,
+ Component63,
+ Component64,
+ Component65,
+ Component66,
+ Component67,
+ Component68,
+ Component69,
+ Component70,
+ Component71,
+ Component72,
+ Component73,
+ Component74,
+ Component75,
+ Component76,
+ Component77,
+ Component78,
+ Component79,
+ Component80,
+ Component81,
+ Component82,
+ Component83,
+ Component84,
+ Component85,
+ Component86,
+ Component87,
+ Component88,
+ Component89,
+ Component90,
+ Component91,
+ Component92,
+ Component93,
+ Component94,
+ Component95,
+ Component96,
+ Component97,
+ Component98,
+ Component99,
+ Component100,
+ Component101,
+ Component102,
+ Component103,
+ Component104,
+ Component105,
+ Component106,
+ Component107,
+ Component108,
+ Component109,
+ Component110,
+ Component111,
+ Component112,
+ Component113,
+ Component114,
+ Component115,
+ Component116,
+ Component117,
+ Component118,
+ Component119,
+};
+static_assert(static_cast<u64>(Patch::Component119) == 127);
+
+[[nodiscard]] bool IsGeneric(Patch patch) noexcept;
+
+[[nodiscard]] u32 GenericPatchIndex(Patch patch);
+
+[[nodiscard]] u32 GenericPatchElement(Patch patch);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp
new file mode 100644
index 000000000..16bc44101
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/post_order.h"
+
+namespace Shader::IR {
+
+BlockList PostOrder(const AbstractSyntaxNode& root) {
+ boost::container::small_vector<Block*, 16> block_stack;
+ boost::container::flat_set<Block*> visited;
+ BlockList post_order_blocks;
+
+ if (root.type != AbstractSyntaxNode::Type::Block) {
+ throw LogicError("First node in abstract syntax list root is not a block");
+ }
+ Block* const first_block{root.data.block};
+ visited.insert(first_block);
+ block_stack.push_back(first_block);
+
+ while (!block_stack.empty()) {
+ Block* const block{block_stack.back()};
+ const auto visit{[&](Block* branch) {
+ if (!visited.insert(branch).second) {
+ return false;
+ }
+ // Calling push_back twice is faster than insert on MSVC
+ block_stack.push_back(block);
+ block_stack.push_back(branch);
+ return true;
+ }};
+ block_stack.pop_back();
+ if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
+ post_order_blocks.push_back(block);
+ }
+ }
+ return post_order_blocks;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h
new file mode 100644
index 000000000..07bfbadc3
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::IR {
+
+BlockList PostOrder(const AbstractSyntaxNode& root);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
new file mode 100644
index 000000000..4e7f32423
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+namespace Shader::IR {
+
+enum class Pred : u64 {
+ P0,
+ P1,
+ P2,
+ P3,
+ P4,
+ P5,
+ P6,
+ PT,
+};
+
+constexpr size_t NUM_USER_PREDS = 7;
+constexpr size_t NUM_PREDS = 8;
+
+[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
+ return static_cast<size_t>(pred);
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Pred> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Pred& pred, FormatContext& ctx) {
+ if (pred == Shader::IR::Pred::PT) {
+ return fmt::format_to(ctx.out(), "PT");
+ } else {
+ return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred));
+ }
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
new file mode 100644
index 000000000..3fc06f855
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+std::string DumpProgram(const Program& program) {
+ size_t index{0};
+ std::map<const IR::Inst*, size_t> inst_to_index;
+ std::map<const IR::Block*, size_t> block_to_index;
+
+ for (const IR::Block* const block : program.blocks) {
+ block_to_index.emplace(block, index);
+ ++index;
+ }
+ std::string ret;
+ for (const auto& block : program.blocks) {
+ ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
+ }
+ return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
new file mode 100644
index 000000000..ebcaa8bc2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string>
+
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/program_header.h"
+#include "shader_recompiler/shader_info.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader::IR {
+
+struct Program {
+ AbstractSyntaxList syntax_list;
+ BlockList blocks;
+ BlockList post_order_blocks;
+ Info info;
+ Stage stage{};
+ std::array<u32, 3> workgroup_size{};
+ OutputTopology output_topology{};
+ u32 output_vertices{};
+ u32 invocations{};
+ u32 local_memory_size{};
+ u32 shared_memory_size{};
+ bool is_geometry_passthrough{};
+};
+
+[[nodiscard]] std::string DumpProgram(const Program& program);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
new file mode 100644
index 000000000..a4b635792
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -0,0 +1,332 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+
+enum class Reg : u64 {
+ R0,
+ R1,
+ R2,
+ R3,
+ R4,
+ R5,
+ R6,
+ R7,
+ R8,
+ R9,
+ R10,
+ R11,
+ R12,
+ R13,
+ R14,
+ R15,
+ R16,
+ R17,
+ R18,
+ R19,
+ R20,
+ R21,
+ R22,
+ R23,
+ R24,
+ R25,
+ R26,
+ R27,
+ R28,
+ R29,
+ R30,
+ R31,
+ R32,
+ R33,
+ R34,
+ R35,
+ R36,
+ R37,
+ R38,
+ R39,
+ R40,
+ R41,
+ R42,
+ R43,
+ R44,
+ R45,
+ R46,
+ R47,
+ R48,
+ R49,
+ R50,
+ R51,
+ R52,
+ R53,
+ R54,
+ R55,
+ R56,
+ R57,
+ R58,
+ R59,
+ R60,
+ R61,
+ R62,
+ R63,
+ R64,
+ R65,
+ R66,
+ R67,
+ R68,
+ R69,
+ R70,
+ R71,
+ R72,
+ R73,
+ R74,
+ R75,
+ R76,
+ R77,
+ R78,
+ R79,
+ R80,
+ R81,
+ R82,
+ R83,
+ R84,
+ R85,
+ R86,
+ R87,
+ R88,
+ R89,
+ R90,
+ R91,
+ R92,
+ R93,
+ R94,
+ R95,
+ R96,
+ R97,
+ R98,
+ R99,
+ R100,
+ R101,
+ R102,
+ R103,
+ R104,
+ R105,
+ R106,
+ R107,
+ R108,
+ R109,
+ R110,
+ R111,
+ R112,
+ R113,
+ R114,
+ R115,
+ R116,
+ R117,
+ R118,
+ R119,
+ R120,
+ R121,
+ R122,
+ R123,
+ R124,
+ R125,
+ R126,
+ R127,
+ R128,
+ R129,
+ R130,
+ R131,
+ R132,
+ R133,
+ R134,
+ R135,
+ R136,
+ R137,
+ R138,
+ R139,
+ R140,
+ R141,
+ R142,
+ R143,
+ R144,
+ R145,
+ R146,
+ R147,
+ R148,
+ R149,
+ R150,
+ R151,
+ R152,
+ R153,
+ R154,
+ R155,
+ R156,
+ R157,
+ R158,
+ R159,
+ R160,
+ R161,
+ R162,
+ R163,
+ R164,
+ R165,
+ R166,
+ R167,
+ R168,
+ R169,
+ R170,
+ R171,
+ R172,
+ R173,
+ R174,
+ R175,
+ R176,
+ R177,
+ R178,
+ R179,
+ R180,
+ R181,
+ R182,
+ R183,
+ R184,
+ R185,
+ R186,
+ R187,
+ R188,
+ R189,
+ R190,
+ R191,
+ R192,
+ R193,
+ R194,
+ R195,
+ R196,
+ R197,
+ R198,
+ R199,
+ R200,
+ R201,
+ R202,
+ R203,
+ R204,
+ R205,
+ R206,
+ R207,
+ R208,
+ R209,
+ R210,
+ R211,
+ R212,
+ R213,
+ R214,
+ R215,
+ R216,
+ R217,
+ R218,
+ R219,
+ R220,
+ R221,
+ R222,
+ R223,
+ R224,
+ R225,
+ R226,
+ R227,
+ R228,
+ R229,
+ R230,
+ R231,
+ R232,
+ R233,
+ R234,
+ R235,
+ R236,
+ R237,
+ R238,
+ R239,
+ R240,
+ R241,
+ R242,
+ R243,
+ R244,
+ R245,
+ R246,
+ R247,
+ R248,
+ R249,
+ R250,
+ R251,
+ R252,
+ R253,
+ R254,
+ RZ,
+};
+static_assert(static_cast<int>(Reg::RZ) == 255);
+
+constexpr size_t NUM_USER_REGS = 255;
+constexpr size_t NUM_REGS = 256;
+
+[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
+ if (reg == Reg::RZ) {
+ // Adding or subtracting registers from RZ yields RZ
+ return Reg::RZ;
+ }
+ const int result{static_cast<int>(reg) + num};
+ if (result >= static_cast<int>(Reg::RZ)) {
+ throw LogicError("Overflow on register arithmetic");
+ }
+ if (result < 0) {
+ throw LogicError("Underflow on register arithmetic");
+ }
+ return static_cast<Reg>(result);
+}
+
+[[nodiscard]] constexpr Reg operator-(Reg reg, int num) {
+ return reg + (-num);
+}
+
+constexpr Reg operator++(Reg& reg) {
+ reg = reg + 1;
+ return reg;
+}
+
+constexpr Reg operator++(Reg& reg, int) {
+ const Reg copy{reg};
+ reg = reg + 1;
+ return copy;
+}
+
+[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
+ return static_cast<size_t>(reg);
+}
+
+[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) {
+ return RegIndex(reg) % align == 0 || reg == Reg::RZ;
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Reg> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Reg& reg, FormatContext& ctx) {
+ if (reg == Shader::IR::Reg::RZ) {
+ return fmt::format_to(ctx.out(), "RZ");
+ } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) {
+ return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg));
+ } else {
+ throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg));
+ }
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
new file mode 100644
index 000000000..f28341bfe
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <string>
+
+#include "shader_recompiler/frontend/ir/type.h"
+
+namespace Shader::IR {
+
+std::string NameOf(Type type) {
+ static constexpr std::array names{
+ "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32",
+ "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3",
+ "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4",
+ };
+ const size_t bits{static_cast<size_t>(type)};
+ if (bits == 0) {
+ return "Void";
+ }
+ std::string result;
+ for (size_t i = 0; i < names.size(); i++) {
+ if ((bits & (size_t{1} << i)) != 0) {
+ if (!result.empty()) {
+ result += '|';
+ }
+ result += names[i];
+ }
+ }
+ return result;
+}
+
+bool AreTypesCompatible(Type lhs, Type rhs) noexcept {
+ return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
new file mode 100644
index 000000000..294b230c4
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -0,0 +1,61 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "common/common_funcs.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+
+enum class Type {
+ Void = 0,
+ Opaque = 1 << 0,
+ Reg = 1 << 1,
+ Pred = 1 << 2,
+ Attribute = 1 << 3,
+ Patch = 1 << 4,
+ U1 = 1 << 5,
+ U8 = 1 << 6,
+ U16 = 1 << 7,
+ U32 = 1 << 8,
+ U64 = 1 << 9,
+ F16 = 1 << 10,
+ F32 = 1 << 11,
+ F64 = 1 << 12,
+ U32x2 = 1 << 13,
+ U32x3 = 1 << 14,
+ U32x4 = 1 << 15,
+ F16x2 = 1 << 16,
+ F16x3 = 1 << 17,
+ F16x4 = 1 << 18,
+ F32x2 = 1 << 19,
+ F32x3 = 1 << 20,
+ F32x4 = 1 << 21,
+ F64x2 = 1 << 22,
+ F64x3 = 1 << 23,
+ F64x4 = 1 << 24,
+};
+DECLARE_ENUM_FLAG_OPERATORS(Type)
+
+[[nodiscard]] std::string NameOf(Type type);
+
+[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept;
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Type> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Type& type, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{}", NameOf(type));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
new file mode 100644
index 000000000..d365ea1bc
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -0,0 +1,99 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
+
+Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {}
+
+Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
+
+Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
+
+Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {}
+
+Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
+
+Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
+
+Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
+
+Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
+
+Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
+
+Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
+
+Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
+
+IR::Type Value::Type() const noexcept {
+ if (IsPhi()) {
+ // The type of a phi node is stored in its flags
+ return inst->Flags<IR::Type>();
+ }
+ if (IsIdentity()) {
+ return inst->Arg(0).Type();
+ }
+ if (type == Type::Opaque) {
+ return inst->Type();
+ }
+ return type;
+}
+
+bool Value::operator==(const Value& other) const {
+ if (type != other.type) {
+ return false;
+ }
+ switch (type) {
+ case Type::Void:
+ return true;
+ case Type::Opaque:
+ return inst == other.inst;
+ case Type::Reg:
+ return reg == other.reg;
+ case Type::Pred:
+ return pred == other.pred;
+ case Type::Attribute:
+ return attribute == other.attribute;
+ case Type::Patch:
+ return patch == other.patch;
+ case Type::U1:
+ return imm_u1 == other.imm_u1;
+ case Type::U8:
+ return imm_u8 == other.imm_u8;
+ case Type::U16:
+ case Type::F16:
+ return imm_u16 == other.imm_u16;
+ case Type::U32:
+ case Type::F32:
+ return imm_u32 == other.imm_u32;
+ case Type::U64:
+ case Type::F64:
+ return imm_u64 == other.imm_u64;
+ case Type::U32x2:
+ case Type::U32x3:
+ case Type::U32x4:
+ case Type::F16x2:
+ case Type::F16x3:
+ case Type::F16x4:
+ case Type::F32x2:
+ case Type::F32x3:
+ case Type::F32x4:
+ case Type::F64x2:
+ case Type::F64x3:
+ case Type::F64x4:
+ break;
+ }
+ throw LogicError("Invalid type {}", type);
+}
+
+bool Value::operator!=(const Value& other) const {
+ return !operator==(other);
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
new file mode 100644
index 000000000..0c6bf684d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -0,0 +1,398 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/intrusive/list.hpp>
+
+#include "common/assert.h"
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/attribute.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/patch.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class Block;
+class Inst;
+
+struct AssociatedInsts;
+
+class Value {
+public:
+ Value() noexcept = default;
+ explicit Value(IR::Inst* value) noexcept;
+ explicit Value(IR::Reg value) noexcept;
+ explicit Value(IR::Pred value) noexcept;
+ explicit Value(IR::Attribute value) noexcept;
+ explicit Value(IR::Patch value) noexcept;
+ explicit Value(bool value) noexcept;
+ explicit Value(u8 value) noexcept;
+ explicit Value(u16 value) noexcept;
+ explicit Value(u32 value) noexcept;
+ explicit Value(f32 value) noexcept;
+ explicit Value(u64 value) noexcept;
+ explicit Value(f64 value) noexcept;
+
+ [[nodiscard]] bool IsIdentity() const noexcept;
+ [[nodiscard]] bool IsPhi() const noexcept;
+ [[nodiscard]] bool IsEmpty() const noexcept;
+ [[nodiscard]] bool IsImmediate() const noexcept;
+ [[nodiscard]] IR::Type Type() const noexcept;
+
+ [[nodiscard]] IR::Inst* Inst() const;
+ [[nodiscard]] IR::Inst* InstRecursive() const;
+ [[nodiscard]] IR::Value Resolve() const;
+ [[nodiscard]] IR::Reg Reg() const;
+ [[nodiscard]] IR::Pred Pred() const;
+ [[nodiscard]] IR::Attribute Attribute() const;
+ [[nodiscard]] IR::Patch Patch() const;
+ [[nodiscard]] bool U1() const;
+ [[nodiscard]] u8 U8() const;
+ [[nodiscard]] u16 U16() const;
+ [[nodiscard]] u32 U32() const;
+ [[nodiscard]] f32 F32() const;
+ [[nodiscard]] u64 U64() const;
+ [[nodiscard]] f64 F64() const;
+
+ [[nodiscard]] bool operator==(const Value& other) const;
+ [[nodiscard]] bool operator!=(const Value& other) const;
+
+private:
+ IR::Type type{};
+ union {
+ IR::Inst* inst{};
+ IR::Reg reg;
+ IR::Pred pred;
+ IR::Attribute attribute;
+ IR::Patch patch;
+ bool imm_u1;
+ u8 imm_u8;
+ u16 imm_u16;
+ u32 imm_u32;
+ f32 imm_f32;
+ u64 imm_u64;
+ f64 imm_f64;
+ };
+};
+static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
+static_assert(std::is_trivially_copyable_v<Value>);
+
+template <IR::Type type_>
+class TypedValue : public Value {
+public:
+ TypedValue() = default;
+
+ template <IR::Type other_type>
+ requires((other_type & type_) != IR::Type::Void) explicit(false)
+ TypedValue(const TypedValue<other_type>& value)
+ : Value(value) {}
+
+ explicit TypedValue(const Value& value) : Value(value) {
+ if ((value.Type() & type_) == IR::Type::Void) {
+ throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
+ }
+ }
+
+ explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
+};
+
+class Inst : public boost::intrusive::list_base_hook<> {
+public:
+ explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
+ ~Inst();
+
+ Inst& operator=(const Inst&) = delete;
+ Inst(const Inst&) = delete;
+
+ Inst& operator=(Inst&&) = delete;
+ Inst(Inst&&) = delete;
+
+ /// Get the number of uses this instruction has.
+ [[nodiscard]] int UseCount() const noexcept {
+ return use_count;
+ }
+
+ /// Determines whether this instruction has uses or not.
+ [[nodiscard]] bool HasUses() const noexcept {
+ return use_count > 0;
+ }
+
+ /// Get the opcode this microinstruction represents.
+ [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
+ return op;
+ }
+
+ /// Determines if there is a pseudo-operation associated with this instruction.
+ [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
+ return associated_insts != nullptr;
+ }
+
+ /// Determines whether or not this instruction may have side effects.
+ [[nodiscard]] bool MayHaveSideEffects() const noexcept;
+
+ /// Determines whether or not this instruction is a pseudo-instruction.
+ /// Pseudo-instructions depend on their parent instructions for their semantics.
+ [[nodiscard]] bool IsPseudoInstruction() const noexcept;
+
+ /// Determines if all arguments of this instruction are immediates.
+ [[nodiscard]] bool AreAllArgsImmediates() const;
+
+ /// Gets a pseudo-operation associated with this instruction
+ [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
+
+ /// Get the type this instruction returns.
+ [[nodiscard]] IR::Type Type() const;
+
+ /// Get the number of arguments this instruction has.
+ [[nodiscard]] size_t NumArgs() const {
+ return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
+ }
+
+ /// Get the value of a given argument index.
+ [[nodiscard]] Value Arg(size_t index) const noexcept {
+ if (op == IR::Opcode::Phi) {
+ return phi_args[index].second;
+ } else {
+ return args[index];
+ }
+ }
+
+ /// Set the value of a given argument index.
+ void SetArg(size_t index, Value value);
+
+ /// Get a pointer to the block of a phi argument.
+ [[nodiscard]] Block* PhiBlock(size_t index) const;
+ /// Add phi operand to a phi instruction.
+ void AddPhiOperand(Block* predecessor, const Value& value);
+
+ void Invalidate();
+ void ClearArgs();
+
+ void ReplaceUsesWith(Value replacement);
+
+ void ReplaceOpcode(IR::Opcode opcode);
+
+ template <typename FlagsType>
+ requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+ [[nodiscard]] FlagsType Flags() const noexcept {
+ FlagsType ret;
+ std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
+ return ret;
+ }
+
+ template <typename FlagsType>
+ requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+ [[nodiscard]] void SetFlags(FlagsType value) noexcept {
+ std::memcpy(&flags, &value, sizeof(value));
+ }
+
+ /// Intrusively store the host definition of this instruction.
+ template <typename DefinitionType>
+ void SetDefinition(DefinitionType def) {
+ definition = Common::BitCast<u32>(def);
+ }
+
+ /// Return the intrusively stored host definition of this instruction.
+ template <typename DefinitionType>
+ [[nodiscard]] DefinitionType Definition() const noexcept {
+ return Common::BitCast<DefinitionType>(definition);
+ }
+
+ /// Destructively remove one reference count from the instruction
+ /// Useful for register allocation
+ void DestructiveRemoveUsage() {
+ --use_count;
+ }
+
+ /// Destructively add usages to the instruction
+ /// Useful for register allocation
+ void DestructiveAddUsage(int count) {
+ use_count += count;
+ }
+
+private:
+ struct NonTriviallyDummy {
+ NonTriviallyDummy() noexcept {}
+ };
+
+ void Use(const Value& value);
+ void UndoUse(const Value& value);
+
+ IR::Opcode op{};
+ int use_count{};
+ u32 flags{};
+ u32 definition{};
+ union {
+ NonTriviallyDummy dummy{};
+ boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
+ std::array<Value, 5> args;
+ };
+ std::unique_ptr<AssociatedInsts> associated_insts;
+};
+static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
+
+struct AssociatedInsts {
+ union {
+ Inst* in_bounds_inst;
+ Inst* sparse_inst;
+ Inst* zero_inst{};
+ };
+ Inst* sign_inst{};
+ Inst* carry_inst{};
+ Inst* overflow_inst{};
+};
+
+using U1 = TypedValue<Type::U1>;
+using U8 = TypedValue<Type::U8>;
+using U16 = TypedValue<Type::U16>;
+using U32 = TypedValue<Type::U32>;
+using U64 = TypedValue<Type::U64>;
+using F16 = TypedValue<Type::F16>;
+using F32 = TypedValue<Type::F32>;
+using F64 = TypedValue<Type::F64>;
+using U32U64 = TypedValue<Type::U32 | Type::U64>;
+using F32F64 = TypedValue<Type::F32 | Type::F64>;
+using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
+using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
+using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
+
+inline bool Value::IsIdentity() const noexcept {
+ return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
+}
+
+inline bool Value::IsPhi() const noexcept {
+ return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
+}
+
+inline bool Value::IsEmpty() const noexcept {
+ return type == Type::Void;
+}
+
+inline bool Value::IsImmediate() const noexcept {
+ IR::Type current_type{type};
+ const IR::Inst* current_inst{inst};
+ while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
+ const Value& arg{current_inst->Arg(0)};
+ current_type = arg.type;
+ current_inst = arg.inst;
+ }
+ return current_type != Type::Opaque;
+}
+
+inline IR::Inst* Value::Inst() const {
+ DEBUG_ASSERT(type == Type::Opaque);
+ return inst;
+}
+
+inline IR::Inst* Value::InstRecursive() const {
+ DEBUG_ASSERT(type == Type::Opaque);
+ if (IsIdentity()) {
+ return inst->Arg(0).InstRecursive();
+ }
+ return inst;
+}
+
+inline IR::Value Value::Resolve() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).Resolve();
+ }
+ return *this;
+}
+
+inline IR::Reg Value::Reg() const {
+ DEBUG_ASSERT(type == Type::Reg);
+ return reg;
+}
+
+inline IR::Pred Value::Pred() const {
+ DEBUG_ASSERT(type == Type::Pred);
+ return pred;
+}
+
+inline IR::Attribute Value::Attribute() const {
+ DEBUG_ASSERT(type == Type::Attribute);
+ return attribute;
+}
+
+inline IR::Patch Value::Patch() const {
+ DEBUG_ASSERT(type == Type::Patch);
+ return patch;
+}
+
+inline bool Value::U1() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U1();
+ }
+ DEBUG_ASSERT(type == Type::U1);
+ return imm_u1;
+}
+
+inline u8 Value::U8() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U8();
+ }
+ DEBUG_ASSERT(type == Type::U8);
+ return imm_u8;
+}
+
+inline u16 Value::U16() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U16();
+ }
+ DEBUG_ASSERT(type == Type::U16);
+ return imm_u16;
+}
+
+inline u32 Value::U32() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U32();
+ }
+ DEBUG_ASSERT(type == Type::U32);
+ return imm_u32;
+}
+
+inline f32 Value::F32() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).F32();
+ }
+ DEBUG_ASSERT(type == Type::F32);
+ return imm_f32;
+}
+
+inline u64 Value::U64() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U64();
+ }
+ DEBUG_ASSERT(type == Type::U64);
+ return imm_u64;
+}
+
+inline f64 Value::F64() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).F64();
+ }
+ DEBUG_ASSERT(type == Type::F64);
+ return imm_f64;
+}
+
+[[nodiscard]] inline bool IsPhi(const Inst& inst) {
+ return inst.GetOpcode() == Opcode::Phi;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
new file mode 100644
index 000000000..1a954a509
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -0,0 +1,642 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell::Flow {
+namespace {
+struct Compare {
+ bool operator()(const Block& lhs, Location rhs) const noexcept {
+ return lhs.begin < rhs;
+ }
+
+ bool operator()(Location lhs, const Block& rhs) const noexcept {
+ return lhs < rhs.begin;
+ }
+
+ bool operator()(const Block& lhs, const Block& rhs) const noexcept {
+ return lhs.begin < rhs.begin;
+ }
+};
+
+u32 BranchOffset(Location pc, Instruction inst) {
+ return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
+}
+
+void Split(Block* old_block, Block* new_block, Location pc) {
+ if (pc <= old_block->begin || pc >= old_block->end) {
+ throw InvalidArgument("Invalid address to split={}", pc);
+ }
+ *new_block = Block{};
+ new_block->begin = pc;
+ new_block->end = old_block->end;
+ new_block->end_class = old_block->end_class;
+ new_block->cond = old_block->cond;
+ new_block->stack = old_block->stack;
+ new_block->branch_true = old_block->branch_true;
+ new_block->branch_false = old_block->branch_false;
+ new_block->function_call = old_block->function_call;
+ new_block->return_block = old_block->return_block;
+ new_block->branch_reg = old_block->branch_reg;
+ new_block->branch_offset = old_block->branch_offset;
+ new_block->indirect_branches = std::move(old_block->indirect_branches);
+
+ const Location old_begin{old_block->begin};
+ Stack old_stack{std::move(old_block->stack)};
+ *old_block = Block{};
+ old_block->begin = old_begin;
+ old_block->end = pc;
+ old_block->end_class = EndClass::Branch;
+ old_block->cond = IR::Condition(true);
+ old_block->stack = old_stack;
+ old_block->branch_true = new_block;
+ old_block->branch_false = nullptr;
+}
+
+Token OpcodeToken(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::PBK:
+ case Opcode::BRK:
+ return Token::PBK;
+ case Opcode::PCNT:
+ case Opcode::CONT:
+ return Token::PBK;
+ case Opcode::PEXIT:
+ case Opcode::EXIT:
+ return Token::PEXIT;
+ case Opcode::PLONGJMP:
+ case Opcode::LONGJMP:
+ return Token::PLONGJMP;
+ case Opcode::PRET:
+ case Opcode::RET:
+ case Opcode::CAL:
+ return Token::PRET;
+ case Opcode::SSY:
+ case Opcode::SYNC:
+ return Token::SSY;
+ default:
+ throw InvalidArgument("{}", opcode);
+ }
+}
+
+bool IsAbsoluteJump(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::JCAL:
+ case Opcode::JMP:
+ case Opcode::JMX:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool HasFlowTest(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::BRX:
+ case Opcode::EXIT:
+ case Opcode::JMP:
+ case Opcode::JMX:
+ case Opcode::KIL:
+ case Opcode::BRK:
+ case Opcode::CONT:
+ case Opcode::LONGJMP:
+ case Opcode::RET:
+ case Opcode::SYNC:
+ return true;
+ case Opcode::CAL:
+ case Opcode::JCAL:
+ return false;
+ default:
+ throw InvalidArgument("Invalid branch {}", opcode);
+ }
+}
+
+std::string NameOf(const Block& block) {
+ if (block.begin.IsVirtual()) {
+ return fmt::format("\"Virtual {}\"", block.begin);
+ } else {
+ return fmt::format("\"{}\"", block.begin);
+ }
+}
+} // Anonymous namespace
+
+void Stack::Push(Token token, Location target) {
+ entries.push_back({
+ .token = token,
+ .target{target},
+ });
+}
+
+std::pair<Location, Stack> Stack::Pop(Token token) const {
+ const std::optional<Location> pc{Peek(token)};
+ if (!pc) {
+ throw LogicError("Token could not be found");
+ }
+ return {*pc, Remove(token)};
+}
+
+std::optional<Location> Stack::Peek(Token token) const {
+ const auto it{std::find_if(entries.rbegin(), entries.rend(),
+ [token](const auto& entry) { return entry.token == token; })};
+ if (it == entries.rend()) {
+ return std::nullopt;
+ }
+ return it->target;
+}
+
+Stack Stack::Remove(Token token) const {
+ const auto it{std::find_if(entries.rbegin(), entries.rend(),
+ [token](const auto& entry) { return entry.token == token; })};
+ const auto pos{std::distance(entries.rbegin(), it)};
+ Stack result;
+ result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
+ return result;
+}
+
+bool Block::Contains(Location pc) const noexcept {
+ return pc >= begin && pc < end;
+}
+
+Function::Function(ObjectPool<Block>& block_pool, Location start_address)
+ : entrypoint{start_address} {
+ Label& label{labels.emplace_back()};
+ label.address = start_address;
+ label.block = block_pool.Create(Block{});
+ label.block->begin = start_address;
+ label.block->end = start_address;
+ label.block->end_class = EndClass::Branch;
+ label.block->cond = IR::Condition(true);
+ label.block->branch_true = nullptr;
+ label.block->branch_false = nullptr;
+}
+
+CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
+ bool exits_to_dispatcher_)
+ : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
+ exits_to_dispatcher_} {
+ if (exits_to_dispatcher) {
+ dispatch_block = block_pool.Create(Block{});
+ dispatch_block->begin = {};
+ dispatch_block->end = {};
+ dispatch_block->end_class = EndClass::Exit;
+ dispatch_block->cond = IR::Condition(true);
+ dispatch_block->stack = {};
+ dispatch_block->branch_true = nullptr;
+ dispatch_block->branch_false = nullptr;
+ }
+ functions.emplace_back(block_pool, start_address);
+ for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
+ while (!functions[function_id].labels.empty()) {
+ Function& function{functions[function_id]};
+ Label label{function.labels.back()};
+ function.labels.pop_back();
+ AnalyzeLabel(function_id, label);
+ }
+ }
+ if (exits_to_dispatcher) {
+ const auto last_block{functions[0].blocks.rbegin()};
+ dispatch_block->begin = last_block->end + 1;
+ dispatch_block->end = last_block->end + 1;
+ functions[0].blocks.insert(*dispatch_block);
+ }
+}
+
+void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
+ if (InspectVisitedBlocks(function_id, label)) {
+ // Label address has been visited
+ return;
+ }
+ // Try to find the next block
+ Function* const function{&functions[function_id]};
+ Location pc{label.address};
+ const auto next_it{function->blocks.upper_bound(pc, Compare{})};
+ const bool is_last{next_it == function->blocks.end()};
+ Block* const next{is_last ? nullptr : &*next_it};
+ // Insert before the next block
+ Block* const block{label.block};
+ // Analyze instructions until it reaches an already visited block or there's a branch
+ bool is_branch{false};
+ while (!next || pc < next->begin) {
+ is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
+ if (is_branch) {
+ break;
+ }
+ ++pc;
+ }
+ if (!is_branch) {
+ // If the block finished without a branch,
+ // it means that the next instruction is already visited, jump to it
+ block->end = pc;
+ block->cond = IR::Condition{true};
+ block->branch_true = next;
+ block->branch_false = nullptr;
+ }
+ // Function's pointer might be invalid, resolve it again
+ // Insert the new block
+ functions[function_id].blocks.insert(*block);
+}
+
+bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
+ const Location pc{label.address};
+ Function& function{functions[function_id]};
+ const auto it{
+ std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
+ if (it == function.blocks.end()) {
+ // Address has not been visited
+ return false;
+ }
+ Block* const visited_block{&*it};
+ if (visited_block->begin == pc) {
+ throw LogicError("Dangling block");
+ }
+ Block* const new_block{label.block};
+ Split(visited_block, new_block, pc);
+ function.blocks.insert(it, *new_block);
+ return true;
+}
+
+CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
+ const Instruction inst{env.ReadInstruction(pc.Offset())};
+ const Opcode opcode{Decode(inst.raw)};
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::JMP:
+ case Opcode::RET:
+ if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
+ return AnalysisState::Continue;
+ }
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::JMP:
+ AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
+ break;
+ case Opcode::RET:
+ block->end_class = EndClass::Return;
+ break;
+ default:
+ break;
+ }
+ block->end = pc;
+ return AnalysisState::Branch;
+ case Opcode::BRK:
+ case Opcode::CONT:
+ case Opcode::LONGJMP:
+ case Opcode::SYNC: {
+ if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
+ return AnalysisState::Continue;
+ }
+ const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
+ block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
+ block->end = pc;
+ return AnalysisState::Branch;
+ }
+ case Opcode::KIL: {
+ const Predicate pred{inst.Pred()};
+ const auto ir_pred{static_cast<IR::Pred>(pred.index)};
+ const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
+ AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
+ return AnalysisState::Branch;
+ }
+ case Opcode::PBK:
+ case Opcode::PCNT:
+ case Opcode::PEXIT:
+ case Opcode::PLONGJMP:
+ case Opcode::SSY:
+ block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
+ return AnalysisState::Continue;
+ case Opcode::BRX:
+ case Opcode::JMX:
+ return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
+ case Opcode::EXIT:
+ return AnalyzeEXIT(block, function_id, pc, inst);
+ case Opcode::PRET:
+ throw NotImplementedException("PRET flow analysis");
+ case Opcode::CAL:
+ case Opcode::JCAL: {
+ const bool is_absolute{IsAbsoluteJump(opcode)};
+ const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
+ // Technically CAL pushes into PRET, but that's implicit in the function call for us
+ // Insert the function into the list if it doesn't exist
+ const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
+ const bool exists{it != functions.end()};
+ const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
+ : functions.size()};
+ if (!exists) {
+ functions.emplace_back(block_pool, cal_pc);
+ }
+ block->end_class = EndClass::Call;
+ block->function_call = call_id;
+ block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
+ block->end = pc;
+ return AnalysisState::Branch;
+ }
+ default:
+ break;
+ }
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{true} || pred == Predicate{false}) {
+ return AnalysisState::Continue;
+ }
+ const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
+ AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
+ return AnalysisState::Branch;
+}
+
+void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
+ EndClass insn_end_class, IR::Condition cond) {
+ if (block->begin != pc) {
+ // If the block doesn't start in the conditional instruction
+ // mark it as a label to visit it later
+ block->end = pc;
+ block->cond = IR::Condition{true};
+ block->branch_true = AddLabel(block, block->stack, pc, function_id);
+ block->branch_false = nullptr;
+ return;
+ }
+ // Create a virtual block and a conditional block
+ Block* const conditional_block{block_pool.Create()};
+ Block virtual_block{};
+ virtual_block.begin = block->begin.Virtual();
+ virtual_block.end = block->begin.Virtual();
+ virtual_block.end_class = EndClass::Branch;
+ virtual_block.stack = block->stack;
+ virtual_block.cond = cond;
+ virtual_block.branch_true = conditional_block;
+ virtual_block.branch_false = nullptr;
+ // Save the contents of the visited block in the conditional block
+ *conditional_block = std::move(*block);
+ // Impersonate the visited block with a virtual block
+ *block = std::move(virtual_block);
+ // Set the end properties of the conditional instruction
+ conditional_block->end = pc + 1;
+ conditional_block->end_class = insn_end_class;
+ // Add a label to the instruction after the conditional instruction
+ Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
+ // Branch to the next instruction from the virtual block
+ block->branch_false = endif_block;
+ // And branch to it from the conditional instruction if it is a branch or a kill instruction
+ // Kill instructions are considered a branch because they demote to a helper invocation and
+ // execution may continue.
+ if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
+ conditional_block->cond = IR::Condition{true};
+ conditional_block->branch_true = endif_block;
+ conditional_block->branch_false = nullptr;
+ }
+ // Finally insert the condition block into the list of blocks
+ functions[function_id].blocks.insert(*conditional_block);
+}
+
+bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ Opcode opcode) {
+ if (inst.branch.is_cbuf) {
+ throw NotImplementedException("Branch with constant buffer offset");
+ }
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{false}) {
+ return false;
+ }
+ const bool has_flow_test{HasFlowTest(opcode)};
+ const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
+ if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
+ block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
+ block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
+ } else {
+ block->cond = IR::Condition{true};
+ }
+ return true;
+}
+
+void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ bool is_absolute) {
+ const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
+ block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
+}
+
+CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+ FunctionId function_id) {
+ const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
+ if (!brx_table) {
+ TrackIndirectBranchTable(env, pc, program_start);
+ throw NotImplementedException("Failed to track indirect branch");
+ }
+ const IR::FlowTest flow_test{inst.branch.flow_test};
+ const Predicate pred{inst.Pred()};
+ if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
+ throw NotImplementedException("Conditional indirect branch");
+ }
+ std::vector<u32> targets;
+ targets.reserve(brx_table->num_entries);
+ for (u32 i = 0; i < brx_table->num_entries; ++i) {
+ u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
+ if (!is_absolute) {
+ target += pc.Offset();
+ }
+ target += static_cast<u32>(brx_table->branch_offset);
+ target += 8;
+ targets.push_back(target);
+ }
+ std::ranges::sort(targets);
+ targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
+
+ block->indirect_branches.reserve(targets.size());
+ for (const u32 target : targets) {
+ Block* const branch{AddLabel(block, block->stack, target, function_id)};
+ block->indirect_branches.push_back({
+ .block = branch,
+ .address = target,
+ });
+ }
+ block->cond = IR::Condition{true};
+ block->end = pc + 1;
+ block->end_class = EndClass::IndirectBranch;
+ block->branch_reg = brx_table->branch_reg;
+ block->branch_offset = brx_table->branch_offset + 8;
+ if (!is_absolute) {
+ block->branch_offset += pc.Offset();
+ }
+ return AnalysisState::Branch;
+}
+
+CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
+ Instruction inst) {
+ const IR::FlowTest flow_test{inst.branch.flow_test};
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
+ // EXIT will never be taken
+ return AnalysisState::Continue;
+ }
+ if (exits_to_dispatcher && function_id != 0) {
+ throw NotImplementedException("Dispatch EXIT on external function");
+ }
+ if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
+ if (block->stack.Peek(Token::PEXIT).has_value()) {
+ throw NotImplementedException("Conditional EXIT with PEXIT token");
+ }
+ const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
+ if (exits_to_dispatcher) {
+ block->end = pc;
+ block->end_class = EndClass::Branch;
+ block->cond = cond;
+ block->branch_true = dispatch_block;
+ block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
+ return AnalysisState::Branch;
+ }
+ AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
+ return AnalysisState::Branch;
+ }
+ if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
+ const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
+ block->cond = IR::Condition{true};
+ block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
+ block->branch_false = nullptr;
+ return AnalysisState::Branch;
+ }
+ if (exits_to_dispatcher) {
+ block->cond = IR::Condition{true};
+ block->end = pc;
+ block->end_class = EndClass::Branch;
+ block->branch_true = dispatch_block;
+ block->branch_false = nullptr;
+ return AnalysisState::Branch;
+ }
+ block->end = pc + 1;
+ block->end_class = EndClass::Exit;
+ return AnalysisState::Branch;
+}
+
+Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
+ Function& function{functions[function_id]};
+ if (block->begin == pc) {
+ // Jumps to itself
+ return block;
+ }
+ if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
+ // Block already exists and it has been visited
+ if (function.blocks.begin() != it) {
+ // Check if the previous node is the virtual variant of the label
+ // This won't exist if a virtual node is not needed or it hasn't been visited
+ // If it hasn't been visited and a virtual node is needed, this will still behave as
+ // expected because the node impersonated with its virtual node.
+ const auto prev{std::prev(it)};
+ if (it->begin.Virtual() == prev->begin) {
+ return &*prev;
+ }
+ }
+ return &*it;
+ }
+ // Make sure we don't insert the same layer twice
+ const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
+ if (label_it != function.labels.end()) {
+ return label_it->block;
+ }
+ Block* const new_block{block_pool.Create()};
+ new_block->begin = pc;
+ new_block->end = pc;
+ new_block->end_class = EndClass::Branch;
+ new_block->cond = IR::Condition(true);
+ new_block->stack = stack;
+ new_block->branch_true = nullptr;
+ new_block->branch_false = nullptr;
+ function.labels.push_back(Label{
+ .address{pc},
+ .block = new_block,
+ .stack{std::move(stack)},
+ });
+ return new_block;
+}
+
+std::string CFG::Dot() const {
+ int node_uid{0};
+
+ std::string dot{"digraph shader {\n"};
+ for (const Function& function : functions) {
+ dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
+ dot += fmt::format("\t\tnode [style=filled];\n");
+ for (const Block& block : function.blocks) {
+ const std::string name{NameOf(block)};
+ const auto add_branch = [&](Block* branch, bool add_label) {
+ dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
+ if (add_label && block.cond != IR::Condition{true} &&
+ block.cond != IR::Condition{false}) {
+ dot += fmt::format(" [label=\"{}\"]", block.cond);
+ }
+ dot += '\n';
+ };
+ dot += fmt::format("\t\t{};\n", name);
+ switch (block.end_class) {
+ case EndClass::Branch:
+ if (block.cond != IR::Condition{false}) {
+ add_branch(block.branch_true, true);
+ }
+ if (block.cond != IR::Condition{true}) {
+ add_branch(block.branch_false, false);
+ }
+ break;
+ case EndClass::IndirectBranch:
+ for (const IndirectBranch& branch : block.indirect_branches) {
+ add_branch(branch.block, false);
+ }
+ break;
+ case EndClass::Call:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
+ dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
+ node_uid, block.function_call);
+ dot += '\n';
+ ++node_uid;
+ break;
+ case EndClass::Exit:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ case EndClass::Return:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ case EndClass::Kill:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ }
+ }
+ if (function.entrypoint == 8) {
+ dot += fmt::format("\t\tlabel = \"main\";\n");
+ } else {
+ dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
+ }
+ dot += "\t}\n";
+ }
+ if (!functions.empty()) {
+ auto& function{functions.front()};
+ if (function.blocks.empty()) {
+ dot += "Start;\n";
+ } else {
+ dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
+ }
+ dot += fmt::format("\tStart [shape=diamond];\n");
+ }
+ dot += "}\n";
+ return dot;
+}
+
+} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
new file mode 100644
index 000000000..a6bd3e196
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <optional>
+#include <span>
+#include <string>
+#include <vector>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/intrusive/set.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/condition.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell::Flow {
+
+struct Block;
+
+using FunctionId = size_t;
+
+enum class EndClass {
+ Branch,
+ IndirectBranch,
+ Call,
+ Exit,
+ Return,
+ Kill,
+};
+
+enum class Token {
+ SSY,
+ PBK,
+ PEXIT,
+ PRET,
+ PCNT,
+ PLONGJMP,
+};
+
+struct StackEntry {
+ auto operator<=>(const StackEntry&) const noexcept = default;
+
+ Token token;
+ Location target;
+};
+
+class Stack {
+public:
+ void Push(Token token, Location target);
+ [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
+ [[nodiscard]] std::optional<Location> Peek(Token token) const;
+ [[nodiscard]] Stack Remove(Token token) const;
+
+private:
+ boost::container::small_vector<StackEntry, 3> entries;
+};
+
+struct IndirectBranch {
+ Block* block;
+ u32 address;
+};
+
+struct Block : boost::intrusive::set_base_hook<
+ // Normal link is ~2.5% faster compared to safe link
+ boost::intrusive::link_mode<boost::intrusive::normal_link>> {
+ [[nodiscard]] bool Contains(Location pc) const noexcept;
+
+ bool operator<(const Block& rhs) const noexcept {
+ return begin < rhs.begin;
+ }
+
+ Location begin;
+ Location end;
+ EndClass end_class{};
+ IR::Condition cond{};
+ Stack stack;
+ Block* branch_true{};
+ Block* branch_false{};
+ FunctionId function_call{};
+ Block* return_block{};
+ IR::Reg branch_reg{};
+ s32 branch_offset{};
+ std::vector<IndirectBranch> indirect_branches;
+};
+
+struct Label {
+ Location address;
+ Block* block;
+ Stack stack;
+};
+
+struct Function {
+ explicit Function(ObjectPool<Block>& block_pool, Location start_address);
+
+ Location entrypoint;
+ boost::container::small_vector<Label, 16> labels;
+ boost::intrusive::set<Block> blocks;
+};
+
+class CFG {
+ enum class AnalysisState {
+ Branch,
+ Continue,
+ };
+
+public:
+ explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
+ bool exits_to_dispatcher = false);
+
+ CFG& operator=(const CFG&) = delete;
+ CFG(const CFG&) = delete;
+
+ CFG& operator=(CFG&&) = delete;
+ CFG(CFG&&) = delete;
+
+ [[nodiscard]] std::string Dot() const;
+
+ [[nodiscard]] std::span<const Function> Functions() const noexcept {
+ return std::span(functions.data(), functions.size());
+ }
+ [[nodiscard]] std::span<Function> Functions() noexcept {
+ return std::span(functions.data(), functions.size());
+ }
+
+ [[nodiscard]] bool ExitsToDispatcher() const {
+ return exits_to_dispatcher;
+ }
+
+private:
+ void AnalyzeLabel(FunctionId function_id, Label& label);
+
+ /// Inspect already visited blocks.
+ /// Return true when the block has already been visited
+ bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
+
+ AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
+
+ void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
+ IR::Condition cond);
+
+ /// Return true when the branch instruction is confirmed to be a branch
+ bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ Opcode opcode);
+
+ void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ bool is_absolute);
+ AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+ FunctionId function_id);
+ AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
+
+ /// Return the branch target block id
+ Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
+
+ Environment& env;
+ ObjectPool<Block>& block_pool;
+ boost::container::small_vector<Function, 1> functions;
+ Location program_start;
+ bool exits_to_dispatcher{};
+ Block* dispatch_block{};
+};
+
+} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
new file mode 100644
index 000000000..972f677dc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <bit>
+#include <memory>
+#include <string_view>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+namespace {
+struct MaskValue {
+ u64 mask;
+ u64 value;
+};
+
+constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
+ u64 mask{};
+ u64 value{};
+ u64 bit{u64(1) << 63};
+ while (*encoding) {
+ switch (*encoding) {
+ case '0':
+ mask |= bit;
+ break;
+ case '1':
+ mask |= bit;
+ value |= bit;
+ break;
+ case '-':
+ break;
+ case ' ':
+ break;
+ default:
+ throw LogicError("Invalid encoding character '{}'", *encoding);
+ }
+ ++encoding;
+ if (*encoding != ' ') {
+ bit >>= 1;
+ }
+ }
+ return MaskValue{.mask = mask, .value = value};
+}
+
+struct InstEncoding {
+ MaskValue mask_value;
+ Opcode opcode;
+};
+constexpr std::array UNORDERED_ENCODINGS{
+#define INST(name, cute, encode) \
+ InstEncoding{ \
+ .mask_value{MaskValueFromEncoding(encode)}, \
+ .opcode = Opcode::name, \
+ },
+#include "maxwell.inc"
+#undef INST
+};
+
+constexpr auto SortedEncodings() {
+ std::array encodings{UNORDERED_ENCODINGS};
+ std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
+ return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
+ });
+ return encodings;
+}
+constexpr auto ENCODINGS{SortedEncodings()};
+
+constexpr int WidestLeftBits() {
+ int bits{64};
+ for (const InstEncoding& encoding : ENCODINGS) {
+ bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
+ }
+ return 64 - bits;
+}
+constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
+constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
+
+constexpr size_t ToFastLookupIndex(u64 value) {
+ return static_cast<size_t>(value >> MASK_SHIFT);
+}
+
+constexpr size_t FastLookupSize() {
+ size_t max_width{};
+ for (const InstEncoding& encoding : ENCODINGS) {
+ max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
+ }
+ return max_width + 1;
+}
+constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
+
+struct InstInfo {
+ [[nodiscard]] u64 Mask() const noexcept {
+ return static_cast<u64>(high_mask) << MASK_SHIFT;
+ }
+
+ [[nodiscard]] u64 Value() const noexcept {
+ return static_cast<u64>(high_value) << MASK_SHIFT;
+ }
+
+ u16 high_mask;
+ u16 high_value;
+ Opcode opcode;
+};
+
+constexpr auto MakeFastLookupTableIndex(size_t index) {
+ std::array<InstInfo, 2> encodings{};
+ size_t element{};
+ for (const auto& encoding : ENCODINGS) {
+ const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
+ const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
+ if ((index & mask) == value) {
+ encodings.at(element) = InstInfo{
+ .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
+ .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
+ .opcode = encoding.opcode,
+ };
+ ++element;
+ }
+ }
+ return encodings;
+}
+
+/*constexpr*/ auto MakeFastLookupTable() {
+ auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
+ for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
+ (*encodings)[index] = MakeFastLookupTableIndex(index);
+ }
+ return encodings;
+}
+const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
+} // Anonymous namespace
+
+Opcode Decode(u64 insn) {
+ const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
+ const auto it{std::ranges::find_if(
+ table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
+ if (it == table.end()) {
+ throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
+ }
+ return it->opcode;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h
new file mode 100644
index 000000000..b4f080fd7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] Opcode Decode(u64 insn);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 000000000..008625cb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+namespace {
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 19, u64> immediate;
+ BitField<56, 1, u64> is_negative;
+ BitField<20, 24, s64> brx_offset;
+};
+
+template <typename Callable>
+std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
+ while (pos >= block_begin) {
+ const u64 insn{env.ReadInstruction(pos.Offset())};
+ --pos;
+ if (func(insn, Decode(insn))) {
+ return insn;
+ }
+ }
+ return std::nullopt;
+}
+
+std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
+ IR::Reg brx_reg) {
+ return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
+ const LDC::Encoding ldc{insn};
+ return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
+ ldc.mode == LDC::Mode::Default;
+ });
+}
+
+std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
+ IR::Reg ldc_reg) {
+ return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
+ const Encoding shl{insn};
+ return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
+ });
+}
+
+std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
+ IR::Reg shl_reg) {
+ return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
+ const Encoding imnmx{insn};
+ return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
+ });
+}
+} // Anonymous namespace
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+ Location block_begin) {
+ const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
+ const Opcode brx_opcode{Decode(brx_insn)};
+ if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
+ throw LogicError("Tracked instruction is not BRX or JMX");
+ }
+ const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
+ const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
+
+ Location pos{brx_pos};
+ const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
+ if (!ldc_insn) {
+ return std::nullopt;
+ }
+ const LDC::Encoding ldc{*ldc_insn};
+ const u32 cbuf_index{static_cast<u32>(ldc.index)};
+ const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
+ const IR::Reg ldc_reg{ldc.src_reg};
+
+ const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
+ if (!shl_insn) {
+ return std::nullopt;
+ }
+ const Encoding shl{*shl_insn};
+ const IR::Reg shl_reg{shl.src_reg};
+
+ const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
+ if (!imnmx_insn) {
+ return std::nullopt;
+ }
+ const Encoding imnmx{*imnmx_insn};
+ if (imnmx.is_negative != 0) {
+ return std::nullopt;
+ }
+ const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
+ return IndirectBranchTableInfo{
+ .cbuf_index = cbuf_index,
+ .cbuf_offset = cbuf_offset,
+ .num_entries = imnmx_immediate + 1,
+ .branch_offset = brx_offset,
+ .branch_reg = brx_reg,
+ };
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 000000000..eee5102fa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell {
+
+struct IndirectBranchTableInfo {
+ u32 cbuf_index{};
+ u32 cbuf_offset{};
+ u32 num_entries{};
+ s32 branch_offset{};
+ IR::Reg branch_reg{};
+};
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+ Location block_begin);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
new file mode 100644
index 000000000..743d68d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -0,0 +1,63 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/flow_test.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell {
+
+struct Predicate {
+ Predicate() = default;
+ Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
+ Predicate(bool value) : index{7}, negated{!value} {}
+ Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
+
+ unsigned index;
+ bool negated;
+};
+
+inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
+ return lhs.index == rhs.index && lhs.negated == rhs.negated;
+}
+
+inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
+ return !(lhs == rhs);
+}
+
+union Instruction {
+ Instruction(u64 raw_) : raw{raw_} {}
+
+ u64 raw;
+
+ union {
+ BitField<5, 1, u64> is_cbuf;
+ BitField<0, 5, IR::FlowTest> flow_test;
+
+ [[nodiscard]] u32 Absolute() const noexcept {
+ return static_cast<u32>(absolute);
+ }
+
+ [[nodiscard]] s32 Offset() const noexcept {
+ return static_cast<s32>(offset);
+ }
+
+ private:
+ BitField<20, 24, s64> offset;
+ BitField<20, 32, u64> absolute;
+ } branch;
+
+ [[nodiscard]] Predicate Pred() const noexcept {
+ return Predicate{pred};
+ }
+
+private:
+ BitField<16, 4, u64> pred;
+};
+static_assert(std::is_trivially_copyable_v<Instruction>);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
new file mode 100644
index 000000000..26d29eae2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -0,0 +1,112 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <iterator>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::Maxwell {
+
+class Location {
+ static constexpr u32 VIRTUAL_BIAS{4};
+
+public:
+ constexpr Location() = default;
+
+ constexpr Location(u32 initial_offset) : offset{initial_offset} {
+ if (initial_offset % 8 != 0) {
+ throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
+ }
+ Align();
+ }
+
+ constexpr Location Virtual() const noexcept {
+ Location virtual_location;
+ virtual_location.offset = offset - VIRTUAL_BIAS;
+ return virtual_location;
+ }
+
+ [[nodiscard]] constexpr u32 Offset() const noexcept {
+ return offset;
+ }
+
+ [[nodiscard]] constexpr bool IsVirtual() const {
+ return offset % 8 == VIRTUAL_BIAS;
+ }
+
+ constexpr auto operator<=>(const Location&) const noexcept = default;
+
+ constexpr Location operator++() noexcept {
+ const Location copy{*this};
+ Step();
+ return copy;
+ }
+
+ constexpr Location operator++(int) noexcept {
+ Step();
+ return *this;
+ }
+
+ constexpr Location operator--() noexcept {
+ const Location copy{*this};
+ Back();
+ return copy;
+ }
+
+ constexpr Location operator--(int) noexcept {
+ Back();
+ return *this;
+ }
+
+ constexpr Location operator+(int number) const {
+ Location new_pc{*this};
+ while (number > 0) {
+ --number;
+ ++new_pc;
+ }
+ while (number < 0) {
+ ++number;
+ --new_pc;
+ }
+ return new_pc;
+ }
+
+ constexpr Location operator-(int number) const {
+ return operator+(-number);
+ }
+
+private:
+ constexpr void Align() {
+ offset += offset % 32 == 0 ? 8 : 0;
+ }
+
+ constexpr void Step() {
+ offset += 8 + (offset % 32 == 24 ? 8 : 0);
+ }
+
+ constexpr void Back() {
+ offset -= 8 + (offset % 32 == 8 ? 8 : 0);
+ }
+
+ u32 offset{0xcccccccc};
+};
+
+} // namespace Shader::Maxwell
+
+template <>
+struct fmt::formatter<Shader::Maxwell::Location> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
+ }
+};
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
new file mode 100644
index 000000000..2fee591bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -0,0 +1,286 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+INST(AL2P, "AL2P", "1110 1111 1010 0---")
+INST(ALD, "ALD", "1110 1111 1101 1---")
+INST(AST, "AST", "1110 1111 1111 0---")
+INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----")
+INST(ATOM, "ATOM", "1110 1101 ---- ----")
+INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----")
+INST(ATOMS, "ATOMS", "1110 1100 ---- ----")
+INST(B2R, "B2R", "1111 0000 1011 1---")
+INST(BAR, "BAR", "1111 0000 1010 1---")
+INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---")
+INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---")
+INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---")
+INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---")
+INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---")
+INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---")
+INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---")
+INST(BPT, "BPT", "1110 0011 1010 ----")
+INST(BRA, "BRA", "1110 0010 0100 ----")
+INST(BRK, "BRK", "1110 0011 0100 ----")
+INST(BRX, "BRX", "1110 0010 0101 ----")
+INST(CAL, "CAL", "1110 0010 0110 ----")
+INST(CCTL, "CCTL", "1110 1111 011- ----")
+INST(CCTLL, "CCTLL", "1110 1111 100- ----")
+INST(CONT, "CONT", "1110 0011 0101 ----")
+INST(CS2R, "CS2R", "0101 0000 1100 1---")
+INST(CSET, "CSET", "0101 0000 1001 1---")
+INST(CSETP, "CSETP", "0101 0000 1010 0---")
+INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---")
+INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---")
+INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---")
+INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---")
+INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----")
+INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----")
+INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----")
+INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----")
+INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---")
+INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---")
+INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---")
+INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---")
+INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---")
+INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---")
+INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----")
+INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----")
+INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----")
+INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----")
+INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----")
+INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----")
+INST(EXIT, "EXIT", "1110 0011 0000 ----")
+INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---")
+INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---")
+INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---")
+INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---")
+INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---")
+INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---")
+INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---")
+INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---")
+INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---")
+INST(FADD32I, "FADD32I", "0000 10-- ---- ----")
+INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---")
+INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---")
+INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---")
+INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----")
+INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----")
+INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----")
+INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----")
+INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----")
+INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----")
+INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----")
+INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----")
+INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----")
+INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---")
+INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---")
+INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---")
+INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---")
+INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---")
+INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---")
+INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---")
+INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---")
+INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---")
+INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----")
+INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----")
+INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----")
+INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----")
+INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----")
+INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----")
+INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----")
+INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---")
+INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----")
+INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----")
+INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---")
+INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----")
+INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----")
+INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----")
+INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---")
+INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----")
+INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----")
+INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----")
+INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----")
+INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---")
+INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----")
+INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----")
+INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----")
+INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---")
+INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----")
+INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----")
+INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---")
+INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----")
+INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----")
+INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---")
+INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---")
+INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---")
+INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---")
+INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---")
+INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---")
+INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---")
+INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---")
+INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---")
+INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----")
+INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----")
+INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----")
+INST(IADD32I, "IADD32I", "0001 110- ---- ----")
+INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----")
+INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----")
+INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----")
+INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----")
+INST(IDE, "IDE", "1110 0011 1001 ----")
+INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---")
+INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---")
+INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----")
+INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----")
+INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----")
+INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----")
+INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----")
+INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----")
+INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----")
+INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----")
+INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----")
+INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---")
+INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---")
+INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---")
+INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---")
+INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---")
+INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---")
+INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----")
+INST(IPA, "IPA", "1110 0000 ---- ----")
+INST(ISBERD, "ISBERD", "1110 1111 1101 0---")
+INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---")
+INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---")
+INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---")
+INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----")
+INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----")
+INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----")
+INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----")
+INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----")
+INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----")
+INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----")
+INST(JCAL, "JCAL", "1110 0010 0010 ----")
+INST(JMP, "JMP", "1110 0010 0001 ----")
+INST(JMX, "JMX", "1110 0010 0000 ----")
+INST(KIL, "KIL", "1110 0011 0011 ----")
+INST(LD, "LD", "100- ---- ---- ----")
+INST(LDC, "LDC", "1110 1111 1001 0---")
+INST(LDG, "LDG", "1110 1110 1101 0---")
+INST(LDL, "LDL", "1110 1111 0100 0---")
+INST(LDS, "LDS", "1110 1111 0100 1---")
+INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---")
+INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----")
+INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---")
+INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----")
+INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---")
+INST(LEPC, "LEPC", "0101 0000 1101 0---")
+INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----")
+INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
+INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
+INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
+INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
+INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
+INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
+INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
+INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
+INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")
+INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---")
+INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---")
+INST(MOV32I, "MOV32I", "0000 0001 0000 ----")
+INST(MUFU, "MUFU", "0101 0000 1000 0---")
+INST(NOP, "NOP", "0101 0000 1011 0---")
+INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---")
+INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---")
+INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---")
+INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---")
+INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---")
+INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---")
+INST(PBK, "PBK", "1110 0010 1010 ----")
+INST(PCNT, "PCNT", "1110 0010 1011 ----")
+INST(PEXIT, "PEXIT", "1110 0010 0011 ----")
+INST(PIXLD, "PIXLD", "1110 1111 1110 1---")
+INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----")
+INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---")
+INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---")
+INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---")
+INST(PRET, "PRET", "1110 0010 0111 ----")
+INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----")
+INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----")
+INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----")
+INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----")
+INST(PSET, "PSET", "0101 0000 1000 1---")
+INST(PSETP, "PSETP", "0101 0000 1001 0---")
+INST(R2B, "R2B", "1111 0000 1100 0---")
+INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---")
+INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---")
+INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---")
+INST(RAM, "RAM", "1110 0011 1000 ----")
+INST(RED, "RED", "1110 1011 1111 1---")
+INST(RET, "RET", "1110 0011 0010 ----")
+INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---")
+INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---")
+INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---")
+INST(RTT, "RTT", "1110 0011 0110 ----")
+INST(S2R, "S2R", "1111 0000 1100 1---")
+INST(SAM, "SAM", "1110 0011 0111 ----")
+INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---")
+INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---")
+INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---")
+INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----")
+INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----")
+INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---")
+INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---")
+INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---")
+INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---")
+INST(SHFL, "SHFL", "1110 1111 0001 0---")
+INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---")
+INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---")
+INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---")
+INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---")
+INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---")
+INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---")
+INST(SSY, "SSY", "1110 0010 1001 ----")
+INST(ST, "ST", "101- ---- ---- ----")
+INST(STG, "STG", "1110 1110 1101 1---")
+INST(STL, "STL", "1110 1111 0101 0---")
+INST(STP, "STP", "1110 1110 1010 0---")
+INST(STS, "STS", "1110 1111 0101 1---")
+INST(SUATOM, "SUATOM", "1110 1010 0--- ----")
+INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----")
+INST(SULD, "SULD", "1110 1011 000- ----")
+INST(SURED, "SURED", "1110 1011 010- ----")
+INST(SUST, "SUST", "1110 1011 001- ----")
+INST(SYNC, "SYNC", "1111 0000 1111 1---")
+INST(TEX, "TEX", "1100 0--- ---- ----")
+INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
+INST(TEXS, "TEXS", "1101 -00- ---- ----")
+INST(TLD, "TLD", "1101 1100 ---- ----")
+INST(TLD_b, "TLD (b)", "1101 1101 ---- ----")
+INST(TLD4, "TLD4", "1100 10-- ---- ----")
+INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----")
+INST(TLD4S, "TLD4S", "1101 1111 -0-- ----")
+INST(TLDS, "TLDS", "1101 -01- ---- ----")
+INST(TMML, "TMML", "1101 1111 0101 1---")
+INST(TMML_b, "TMML (b)", "1101 1111 0110 0---")
+INST(TXA, "TXA", "1101 1111 0100 0---")
+INST(TXD, "TXD", "1101 1110 00-- ----")
+INST(TXD_b, "TXD (b)", "1101 1110 01-- ----")
+INST(TXQ, "TXQ", "1101 1111 0100 1---")
+INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---")
+INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----")
+INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----")
+INST(VADD, "VADD", "0010 00-- ---- ----")
+INST(VMAD, "VMAD", "0101 1111 ---- ----")
+INST(VMNMX, "VMNMX", "0011 101- ---- ----")
+INST(VOTE, "VOTE", "0101 0000 1101 1---")
+INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---")
+INST(VSET, "VSET", "0100 000- ---- ----")
+INST(VSETP, "VSETP", "0101 0000 1111 0---")
+INST(VSHL, "VSHL", "0101 0111 ---- ----")
+INST(VSHR, "VSHR", "0101 0110 ---- ----")
+INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----")
+INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----")
+INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----")
+INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----")
+
+// Removed due to its weird formatting making fast tables larger
+// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0")
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
new file mode 100644
index 000000000..ccc40c20c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+namespace {
+constexpr std::array NAME_TABLE{
+#define INST(name, cute, encode) cute,
+#include "maxwell.inc"
+#undef INST
+};
+} // Anonymous namespace
+
+const char* NameOf(Opcode opcode) {
+ if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
+ throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
+ }
+ return NAME_TABLE[static_cast<size_t>(opcode)];
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h
new file mode 100644
index 000000000..cd574f29d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.h
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+namespace Shader::Maxwell {
+
+enum class Opcode {
+#define INST(name, cute, encode) name,
+#include "maxwell.inc"
+#undef INST
+};
+
+const char* NameOf(Opcode opcode);
+
+} // namespace Shader::Maxwell
+
+template <>
+struct fmt::formatter<Shader::Maxwell::Opcode> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
+ return format_to(ctx.out(), "{}", NameOf(opcode));
+ }
+};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
new file mode 100644
index 000000000..8b3e0a15c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -0,0 +1,883 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <version>
+
+#include <fmt/format.h>
+
+#include <boost/intrusive/list.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+namespace {
+struct Statement;
+
+// Use normal_link because we are not guaranteed to destroy the tree in order
+using ListBaseHook =
+ boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
+
+using Tree = boost::intrusive::list<Statement,
+ // Allow using Statement without a definition
+ boost::intrusive::base_hook<ListBaseHook>,
+ // Avoid linear complexity on splice, size is never called
+ boost::intrusive::constant_time_size<false>>;
+using Node = Tree::iterator;
+
+enum class StatementType {
+ Code,
+ Goto,
+ Label,
+ If,
+ Loop,
+ Break,
+ Return,
+ Kill,
+ Unreachable,
+ Function,
+ Identity,
+ Not,
+ Or,
+ SetVariable,
+ SetIndirectBranchVariable,
+ Variable,
+ IndirectBranchCond,
+};
+
+bool HasChildren(StatementType type) {
+ switch (type) {
+ case StatementType::If:
+ case StatementType::Loop:
+ case StatementType::Function:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct Goto {};
+struct Label {};
+struct If {};
+struct Loop {};
+struct Break {};
+struct Return {};
+struct Kill {};
+struct Unreachable {};
+struct FunctionTag {};
+struct Identity {};
+struct Not {};
+struct Or {};
+struct SetVariable {};
+struct SetIndirectBranchVariable {};
+struct Variable {};
+struct IndirectBranchCond {};
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
+#endif
+struct Statement : ListBaseHook {
+ Statement(const Flow::Block* block_, Statement* up_)
+ : block{block_}, up{up_}, type{StatementType::Code} {}
+ Statement(Goto, Statement* cond_, Node label_, Statement* up_)
+ : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
+ Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
+ Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
+ : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
+ Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
+ : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
+ Statement(Break, Statement* cond_, Statement* up_)
+ : cond{cond_}, up{up_}, type{StatementType::Break} {}
+ Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
+ Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
+ Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
+ Statement(FunctionTag) : children{}, type{StatementType::Function} {}
+ Statement(Identity, IR::Condition cond_, Statement* up_)
+ : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
+ Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
+ Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
+ : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
+ Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
+ : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
+ Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
+ : branch_offset{branch_offset_},
+ branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
+ Statement(Variable, u32 id_, Statement* up_)
+ : id{id_}, up{up_}, type{StatementType::Variable} {}
+ Statement(IndirectBranchCond, u32 location_, Statement* up_)
+ : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
+
+ ~Statement() {
+ if (HasChildren(type)) {
+ std::destroy_at(&children);
+ }
+ }
+
+ union {
+ const Flow::Block* block;
+ Node label;
+ Tree children;
+ IR::Condition guest_cond;
+ Statement* op;
+ Statement* op_a;
+ u32 location;
+ s32 branch_offset;
+ };
+ union {
+ Statement* cond;
+ Statement* op_b;
+ u32 id;
+ IR::Reg branch_reg;
+ };
+ Statement* up{};
+ StatementType type;
+};
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+std::string DumpExpr(const Statement* stmt) {
+ switch (stmt->type) {
+ case StatementType::Identity:
+ return fmt::format("{}", stmt->guest_cond);
+ case StatementType::Not:
+ return fmt::format("!{}", DumpExpr(stmt->op));
+ case StatementType::Or:
+ return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
+ case StatementType::Variable:
+ return fmt::format("goto_L{}", stmt->id);
+ case StatementType::IndirectBranchCond:
+ return fmt::format("(indirect_branch == {:x})", stmt->location);
+ default:
+ return "<invalid type>";
+ }
+}
+
+[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) {
+ std::string ret;
+ std::string indent(indentation, ' ');
+ for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
+ switch (stmt->type) {
+ case StatementType::Code:
+ ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent,
+ stmt->block->begin.Offset(), stmt->block->end.Offset(),
+ reinterpret_cast<uintptr_t>(stmt->block));
+ break;
+ case StatementType::Goto:
+ ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
+ stmt->label->id);
+ break;
+ case StatementType::Label:
+ ret += fmt::format("{}L{}:\n", indent, stmt->id);
+ break;
+ case StatementType::If:
+ ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond));
+ ret += DumpTree(stmt->children, indentation + 4);
+ ret += fmt::format("{} }}\n", indent);
+ break;
+ case StatementType::Loop:
+ ret += fmt::format("{} do {{\n", indent);
+ ret += DumpTree(stmt->children, indentation + 4);
+ ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond));
+ break;
+ case StatementType::Break:
+ ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond));
+ break;
+ case StatementType::Return:
+ ret += fmt::format("{} return;\n", indent);
+ break;
+ case StatementType::Kill:
+ ret += fmt::format("{} kill;\n", indent);
+ break;
+ case StatementType::Unreachable:
+ ret += fmt::format("{} unreachable;\n", indent);
+ break;
+ case StatementType::SetVariable:
+ ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
+ break;
+ case StatementType::SetIndirectBranchVariable:
+ ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg,
+ stmt->branch_offset);
+ break;
+ case StatementType::Function:
+ case StatementType::Identity:
+ case StatementType::Not:
+ case StatementType::Or:
+ case StatementType::Variable:
+ case StatementType::IndirectBranchCond:
+ throw LogicError("Statement can't be printed");
+ }
+ }
+ return ret;
+}
+
+void SanitizeNoBreaks(const Tree& tree) {
+ if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
+ throw NotImplementedException("Capturing statement with break nodes");
+ }
+}
+
+size_t Level(Node stmt) {
+ size_t level{0};
+ Statement* node{stmt->up};
+ while (node) {
+ ++level;
+ node = node->up;
+ }
+ return level;
+}
+
+bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
+ const size_t goto_level{Level(goto_stmt)};
+ const size_t label_level{Level(label_stmt)};
+ size_t min_level;
+ size_t max_level;
+ Node min;
+ Node max;
+ if (label_level < goto_level) {
+ min_level = label_level;
+ max_level = goto_level;
+ min = label_stmt;
+ max = goto_stmt;
+ } else { // goto_level < label_level
+ min_level = goto_level;
+ max_level = label_level;
+ min = goto_stmt;
+ max = label_stmt;
+ }
+ while (max_level > min_level) {
+ --max_level;
+ max = max->up;
+ }
+ return min->up == max->up;
+}
+
+bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
+ return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
+}
+
+[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
+ Node it{goto_stmt};
+ do {
+ if (it == label_stmt) {
+ return true;
+ }
+ --it;
+ } while (it != goto_stmt->up->children.begin());
+ while (it != goto_stmt->up->children.end()) {
+ if (it == label_stmt) {
+ return true;
+ }
+ ++it;
+ }
+ return false;
+}
+
+Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
+ Statement* const parent{uncle->up};
+ Statement* it{&*nephew};
+ while (it->up != parent) {
+ it = it->up;
+ }
+ return Tree::s_iterator_to(*it);
+}
+
+bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
+ const Node end{right_sibling->up->children.end()};
+ for (auto it = right_sibling; it != end; ++it) {
+ if (it == left_sibling) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
+ const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
+ return AreOrdered(sibling, goto_stmt);
+}
+
+class GotoPass {
+public:
+ explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
+ std::vector gotos{BuildTree(cfg)};
+ const auto end{gotos.rend()};
+ for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
+ RemoveGoto(*goto_stmt);
+ }
+ }
+
+ Statement& RootStatement() noexcept {
+ return root_stmt;
+ }
+
+private:
+ void RemoveGoto(Node goto_stmt) {
+ // Force goto_stmt and label_stmt to be directly related
+ const Node label_stmt{goto_stmt->label};
+ if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
+ // Move goto_stmt out using outward-movement transformation until it becomes
+ // directly related to label_stmt
+ while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
+ goto_stmt = MoveOutward(goto_stmt);
+ }
+ }
+ // Force goto_stmt and label_stmt to be siblings
+ if (IsDirectlyRelated(goto_stmt, label_stmt)) {
+ const size_t label_level{Level(label_stmt)};
+ size_t goto_level{Level(goto_stmt)};
+ if (goto_level > label_level) {
+ // Move goto_stmt out of its level using outward-movement transformations
+ while (goto_level > label_level) {
+ goto_stmt = MoveOutward(goto_stmt);
+ --goto_level;
+ }
+ } else { // Level(goto_stmt) < Level(label_stmt)
+ if (NeedsLift(goto_stmt, label_stmt)) {
+ // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
+ // transformations
+ goto_stmt = Lift(goto_stmt);
+ }
+ // Move goto_stmt into label_stmt's level using inward-movement transformation
+ while (goto_level < label_level) {
+ goto_stmt = MoveInward(goto_stmt);
+ ++goto_level;
+ }
+ }
+ }
+ // Expensive operation:
+ // if (!AreSiblings(goto_stmt, label_stmt)) {
+ // throw LogicError("Goto is not a sibling with the label");
+ // }
+ // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
+ if (std::next(goto_stmt) == label_stmt) {
+ // Simply eliminate the goto if the label is next to it
+ goto_stmt->up->children.erase(goto_stmt);
+ } else if (AreOrdered(goto_stmt, label_stmt)) {
+ // Eliminate goto_stmt with a conditional
+ EliminateAsConditional(goto_stmt, label_stmt);
+ } else {
+ // Eliminate goto_stmt with a loop
+ EliminateAsLoop(goto_stmt, label_stmt);
+ }
+ }
+
+ std::vector<Node> BuildTree(Flow::CFG& cfg) {
+ u32 label_id{0};
+ std::vector<Node> gotos;
+ Flow::Function& first_function{cfg.Functions().front()};
+ BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
+ return gotos;
+ }
+
+ void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
+ std::vector<Node>& gotos, Node function_insert_point,
+ std::optional<Node> return_label) {
+ Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
+ Tree& root{root_stmt.children};
+ std::unordered_map<Flow::Block*, Node> local_labels;
+ local_labels.reserve(function.blocks.size());
+
+ for (Flow::Block& block : function.blocks) {
+ Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
+ const Node label_it{root.insert(function_insert_point, *label)};
+ local_labels.emplace(&block, label_it);
+ ++label_id;
+ }
+ for (Flow::Block& block : function.blocks) {
+ const Node label{local_labels.at(&block)};
+ // Insertion point
+ const Node ip{std::next(label)};
+
+ // Reset goto variables before the first block and after its respective label
+ const auto make_reset_variable{[&]() -> Statement& {
+ return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
+ }};
+ root.push_front(make_reset_variable());
+ root.insert(ip, make_reset_variable());
+ root.insert(ip, *pool.Create(&block, &root_stmt));
+
+ switch (block.end_class) {
+ case Flow::EndClass::Branch: {
+ Statement* const always_cond{
+ pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
+ if (block.cond == IR::Condition{true}) {
+ const Node true_label{local_labels.at(block.branch_true)};
+ gotos.push_back(
+ root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
+ } else if (block.cond == IR::Condition{false}) {
+ const Node false_label{local_labels.at(block.branch_false)};
+ gotos.push_back(root.insert(
+ ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
+ } else {
+ const Node true_label{local_labels.at(block.branch_true)};
+ const Node false_label{local_labels.at(block.branch_false)};
+ Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
+ gotos.push_back(
+ root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
+ gotos.push_back(root.insert(
+ ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
+ }
+ break;
+ }
+ case Flow::EndClass::IndirectBranch:
+ root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
+ block.branch_offset, &root_stmt));
+ for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
+ const Node indirect_label{local_labels.at(indirect.block)};
+ Statement* cond{
+ pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
+ Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
+ gotos.push_back(root.insert(ip, *goto_stmt));
+ }
+ root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
+ break;
+ case Flow::EndClass::Call: {
+ Flow::Function& call{cfg.Functions()[block.function_call]};
+ const Node call_return_label{local_labels.at(block.return_block)};
+ BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
+ break;
+ }
+ case Flow::EndClass::Exit:
+ root.insert(ip, *pool.Create(Return{}, &root_stmt));
+ break;
+ case Flow::EndClass::Return: {
+ Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
+ auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
+ gotos.push_back(root.insert(ip, *goto_stmt));
+ break;
+ }
+ case Flow::EndClass::Kill:
+ root.insert(ip, *pool.Create(Kill{}, &root_stmt));
+ break;
+ }
+ }
+ }
+
+ void UpdateTreeUp(Statement* tree) {
+ for (Statement& stmt : tree->children) {
+ stmt.up = tree;
+ }
+ }
+
+ void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
+ Tree& body{goto_stmt->up->children};
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
+ Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
+ Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+ body.erase(goto_stmt);
+ }
+
+ void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
+ Tree& body{goto_stmt->up->children};
+ Tree loop_body;
+ loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
+ Statement* const cond{goto_stmt->cond};
+ Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
+ UpdateTreeUp(loop);
+ body.insert(goto_stmt, *loop);
+ body.erase(goto_stmt);
+ }
+
+ [[nodiscard]] Node MoveOutward(Node goto_stmt) {
+ switch (goto_stmt->up->type) {
+ case StatementType::If:
+ return MoveOutwardIf(goto_stmt);
+ case StatementType::Loop:
+ return MoveOutwardLoop(goto_stmt);
+ default:
+ throw LogicError("Invalid outward movement");
+ }
+ }
+
+ [[nodiscard]] Node MoveInward(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const Node label{goto_stmt->label};
+ const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
+ const u32 label_id{label->id};
+
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+ body.insert(goto_stmt, *set_var);
+
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
+ Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
+ if (!if_body.empty()) {
+ Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+ }
+ body.erase(goto_stmt);
+
+ switch (label_nested_stmt->type) {
+ case StatementType::If:
+ // Update nested if condition
+ label_nested_stmt->cond =
+ pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
+ break;
+ case StatementType::Loop:
+ break;
+ default:
+ throw LogicError("Invalid inward movement");
+ }
+ Tree& nested_tree{label_nested_stmt->children};
+ Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
+ return nested_tree.insert(nested_tree.begin(), *new_goto);
+ }
+
+ [[nodiscard]] Node Lift(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const Node label{goto_stmt->label};
+ const u32 label_id{label->id};
+ const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
+
+ Tree loop_body;
+ loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
+ SanitizeNoBreaks(loop_body);
+ Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
+ UpdateTreeUp(loop_stmt);
+ body.insert(goto_stmt, *loop_stmt);
+
+ Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
+ loop_stmt->children.push_front(*new_goto);
+ const Node new_goto_node{loop_stmt->children.begin()};
+
+ Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
+ loop_stmt->children.push_back(*set_var);
+
+ body.erase(goto_stmt);
+ return new_goto_node;
+ }
+
+ Node MoveOutwardIf(Node goto_stmt) {
+ const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
+ Tree& body{parent->children};
+ const u32 label_id{goto_stmt->label->id};
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
+ body.insert(goto_stmt, *set_goto_var);
+
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
+ if_body.pop_front();
+ Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
+ Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+
+ body.erase(goto_stmt);
+
+ Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
+ Tree& parent_tree{parent->up->children};
+ return parent_tree.insert(std::next(parent), *new_goto);
+ }
+
+ Node MoveOutwardLoop(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const u32 label_id{goto_stmt->label->id};
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+ Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
+ body.insert(goto_stmt, *set_goto_var);
+ body.insert(goto_stmt, *break_stmt);
+ body.erase(goto_stmt);
+
+ const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
+ Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
+ Tree& parent_tree{loop->up->children};
+ return parent_tree.insert(std::next(loop), *new_goto);
+ }
+
+ ObjectPool<Statement>& pool;
+ Statement root_stmt{FunctionTag{}};
+};
+
+[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
+ Tree& tree{stmt.up->children};
+ const Node end{tree.end()};
+ Node forward_node{std::next(Tree::s_iterator_to(stmt))};
+ while (forward_node != end && !HasChildren(forward_node->type)) {
+ if (forward_node->type == StatementType::Code) {
+ return &*forward_node;
+ }
+ ++forward_node;
+ }
+ return nullptr;
+}
+
+[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
+ switch (stmt.type) {
+ case StatementType::Identity:
+ return ir.Condition(stmt.guest_cond);
+ case StatementType::Not:
+ return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
+ case StatementType::Or:
+ return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
+ case StatementType::Variable:
+ return ir.GetGotoVariable(stmt.id);
+ case StatementType::IndirectBranchCond:
+ return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
+ default:
+ throw NotImplementedException("Statement type {}", stmt.type);
+ }
+}
+
+class TranslatePass {
+public:
+ TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
+ ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
+ IR::AbstractSyntaxList& syntax_list_)
+ : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
+ syntax_list{syntax_list_} {
+ Visit(root_stmt, nullptr, nullptr);
+
+ IR::Block& first_block{*syntax_list.front().data.block};
+ IR::IREmitter ir(first_block, first_block.begin());
+ ir.Prologue();
+ }
+
+private:
+ void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
+ IR::Block* current_block{};
+ const auto ensure_block{[&] {
+ if (current_block) {
+ return;
+ }
+ current_block = block_pool.Create(inst_pool);
+ auto& node{syntax_list.emplace_back()};
+ node.type = IR::AbstractSyntaxNode::Type::Block;
+ node.data.block = current_block;
+ }};
+ Tree& tree{parent.children};
+ for (auto it = tree.begin(); it != tree.end(); ++it) {
+ Statement& stmt{*it};
+ switch (stmt.type) {
+ case StatementType::Label:
+ // Labels can be ignored
+ break;
+ case StatementType::Code: {
+ ensure_block();
+ Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
+ break;
+ }
+ case StatementType::SetVariable: {
+ ensure_block();
+ IR::IREmitter ir{*current_block};
+ ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
+ break;
+ }
+ case StatementType::SetIndirectBranchVariable: {
+ ensure_block();
+ IR::IREmitter ir{*current_block};
+ IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
+ ir.SetIndirectBranchVariable(address);
+ break;
+ }
+ case StatementType::If: {
+ ensure_block();
+ IR::Block* const merge_block{MergeBlock(parent, stmt)};
+
+ // Implement if header block
+ IR::IREmitter ir{*current_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+
+ const size_t if_node_index{syntax_list.size()};
+ syntax_list.emplace_back();
+
+ // Visit children
+ const size_t then_block_index{syntax_list.size()};
+ Visit(stmt, break_block, merge_block);
+
+ IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
+ current_block->AddBranch(then_block);
+ current_block->AddBranch(merge_block);
+ current_block = merge_block;
+
+ auto& if_node{syntax_list[if_node_index]};
+ if_node.type = IR::AbstractSyntaxNode::Type::If;
+ if_node.data.if_node.cond = cond;
+ if_node.data.if_node.body = then_block;
+ if_node.data.if_node.merge = merge_block;
+
+ auto& endif_node{syntax_list.emplace_back()};
+ endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
+ endif_node.data.end_if.merge = merge_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = merge_block;
+ break;
+ }
+ case StatementType::Loop: {
+ IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
+ if (current_block) {
+ current_block->AddBranch(loop_header_block);
+ }
+ auto& header_node{syntax_list.emplace_back()};
+ header_node.type = IR::AbstractSyntaxNode::Type::Block;
+ header_node.data.block = loop_header_block;
+
+ IR::Block* const continue_block{block_pool.Create(inst_pool)};
+ IR::Block* const merge_block{MergeBlock(parent, stmt)};
+
+ const size_t loop_node_index{syntax_list.size()};
+ syntax_list.emplace_back();
+
+ // Visit children
+ const size_t body_block_index{syntax_list.size()};
+ Visit(stmt, merge_block, continue_block);
+
+ // The continue block is located at the end of the loop
+ IR::IREmitter ir{*continue_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+
+ IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
+ loop_header_block->AddBranch(body_block);
+
+ continue_block->AddBranch(loop_header_block);
+ continue_block->AddBranch(merge_block);
+
+ current_block = merge_block;
+
+ auto& loop{syntax_list[loop_node_index]};
+ loop.type = IR::AbstractSyntaxNode::Type::Loop;
+ loop.data.loop.body = body_block;
+ loop.data.loop.continue_block = continue_block;
+ loop.data.loop.merge = merge_block;
+
+ auto& continue_block_node{syntax_list.emplace_back()};
+ continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
+ continue_block_node.data.block = continue_block;
+
+ auto& repeat{syntax_list.emplace_back()};
+ repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
+ repeat.data.repeat.cond = cond;
+ repeat.data.repeat.loop_header = loop_header_block;
+ repeat.data.repeat.merge = merge_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = merge_block;
+ break;
+ }
+ case StatementType::Break: {
+ ensure_block();
+ IR::Block* const skip_block{MergeBlock(parent, stmt)};
+
+ IR::IREmitter ir{*current_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+ current_block->AddBranch(break_block);
+ current_block->AddBranch(skip_block);
+ current_block = skip_block;
+
+ auto& break_node{syntax_list.emplace_back()};
+ break_node.type = IR::AbstractSyntaxNode::Type::Break;
+ break_node.data.break_node.cond = cond;
+ break_node.data.break_node.merge = break_block;
+ break_node.data.break_node.skip = skip_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = skip_block;
+ break;
+ }
+ case StatementType::Return: {
+ ensure_block();
+ IR::IREmitter{*current_block}.Epilogue();
+ current_block = nullptr;
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
+ break;
+ }
+ case StatementType::Kill: {
+ ensure_block();
+ IR::Block* demote_block{MergeBlock(parent, stmt)};
+ IR::IREmitter{*current_block}.DemoteToHelperInvocation();
+ current_block->AddBranch(demote_block);
+ current_block = demote_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = demote_block;
+ break;
+ }
+ case StatementType::Unreachable: {
+ ensure_block();
+ current_block = nullptr;
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
+ break;
+ }
+ default:
+ throw NotImplementedException("Statement type {}", stmt.type);
+ }
+ }
+ if (current_block) {
+ if (fallthrough_block) {
+ current_block->AddBranch(fallthrough_block);
+ } else {
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
+ }
+ }
+ }
+
+ IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
+ Statement* merge_stmt{TryFindForwardBlock(stmt)};
+ if (!merge_stmt) {
+ // Create a merge block we can visit later
+ merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
+ parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
+ }
+ return block_pool.Create(inst_pool);
+ }
+
+ ObjectPool<Statement>& stmt_pool;
+ ObjectPool<IR::Inst>& inst_pool;
+ ObjectPool<IR::Block>& block_pool;
+ Environment& env;
+ IR::AbstractSyntaxList& syntax_list;
+
+// TODO: C++20 Remove this when all compilers support constexpr std::vector
+#if __cpp_lib_constexpr_vector >= 201907
+ static constexpr Flow::Block dummy_flow_block;
+#else
+ const Flow::Block dummy_flow_block;
+#endif
+};
+} // Anonymous namespace
+
+IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+ Environment& env, Flow::CFG& cfg) {
+ ObjectPool<Statement> stmt_pool{64};
+ GotoPass goto_pass{cfg, stmt_pool};
+ Statement& root{goto_pass.RootStatement()};
+ IR::AbstractSyntaxList syntax_list;
+ TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+ return syntax_list;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
new file mode 100644
index 000000000..88b083649
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -0,0 +1,20 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool, Environment& env,
+ Flow::CFG& cfg);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+ SAFEADD,
+};
+
+enum class AtomSize : u64 {
+ U32,
+ S32,
+ U64,
+ F32,
+ F16x2,
+ S64,
+};
+
+IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
+ AtomOp op, bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.GlobalAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.GlobalAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.GlobalAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.GlobalAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.GlobalAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.GlobalAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.GlobalAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atom Operation {}", op);
+ }
+}
+
+IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
+ AtomSize size) {
+ static constexpr IR::FpControl f16_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::RN,
+ .fmz_mode = IR::FmzMode::DontCare,
+ };
+ static constexpr IR::FpControl f32_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::RN,
+ .fmz_mode = IR::FmzMode::FTZ,
+ };
+ switch (op) {
+ case AtomOp::ADD:
+ return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
+ : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
+ default:
+ throw NotImplementedException("FP Atom Operation {}", op);
+ }
+}
+
+IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<28, 20, s64> addr_offset;
+ BitField<28, 20, u64> rz_addr_offset;
+ BitField<48, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ return v.L(mem.addr_reg);
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+
+bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
+ // TODO: SAFEADD
+ switch (size) {
+ case AtomSize::S32:
+ case AtomSize::U64:
+ return (op == AtomOp::INC || op == AtomOp::DEC);
+ case AtomSize::S64:
+ return !(op == AtomOp::MIN || op == AtomOp::MAX);
+ case AtomSize::F32:
+ return op != AtomOp::ADD;
+ case AtomSize::F16x2:
+ return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
+ default:
+ return false;
+ }
+}
+
+IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F32:
+ case AtomSize::F16x2:
+ return ir.LoadGlobal32(offset);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ir.PackUint2x32(ir.LoadGlobal64(offset));
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F16x2:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return v.L(dest_reg, IR::U64{result});
+ case AtomSize::F32:
+ return v.F(dest_reg, IR::F32{result});
+ default:
+ break;
+ }
+}
+
+IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
+ AtomSize size, AtomOp op) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
+ case AtomSize::F32:
+ return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
+ case AtomSize::F16x2: {
+ return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
+ }
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
+ const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
+ IR::Value result;
+ if (AtomOpNotApplicable(size, op)) {
+ result = LoadGlobal(v.ir, offset, size);
+ } else {
+ result = ApplyAtomOp(v, operand_reg, offset, size, op);
+ }
+ if (write_dest) {
+ StoreResult(v, dest_reg, result, size);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 8, IR::Reg> operand_reg;
+ BitField<49, 3, AtomSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atom{insn};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
+}
+
+void TranslatorVisitor::RED(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> operand_reg;
+ BitField<20, 3, AtomSize> size;
+ BitField<23, 3, AtomOp> op;
+ } const red{insn};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class AtomsSize : u64 {
+ U32,
+ S32,
+ U64,
+};
+
+IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.SharedAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.SharedAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.SharedAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.SharedAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.SharedAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.SharedAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.SharedAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.SharedAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.SharedAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atoms Operation {}", op);
+ }
+}
+
+IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<30, 22, u64> absolute_offset;
+ BitField<30, 22, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
+ switch (size) {
+ case AtomsSize::U32:
+ case AtomsSize::S32:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomsSize::U64:
+ return v.L(dest_reg, IR::U64{result});
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOMS(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<28, 2, AtomsSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atoms{insn};
+
+ const bool size_64{atoms.size == AtomsSize::U64};
+ if (size_64 && atoms.op != AtomOp::EXCH) {
+ throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
+ }
+ const bool is_signed{atoms.size == AtomsSize::S32};
+ const IR::U32 offset{AtomsOffset(*this, insn)};
+
+ IR::Value result;
+ if (size_64) {
+ result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
+ } else {
+ result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
+ }
+ StoreResult(*this, atoms.dest_reg, result, atoms.size);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class BitSize : u64 {
+ B32,
+ B64,
+ B96,
+ B128,
+};
+
+void TranslatorVisitor::AL2P(u64 inst) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> result_register;
+ BitField<8, 8, IR::Reg> indexing_register;
+ BitField<20, 11, s64> offset;
+ BitField<47, 2, BitSize> bitsize;
+ } al2p{inst};
+ if (al2p.bitsize != BitSize::B32) {
+ throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
+ }
+ const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
+ const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
+ X(al2p.result_register, result);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// Seems to be in CUDA terminology.
+enum class LocalScope : u64 {
+ CTA,
+ GL,
+ SYS,
+ VC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MEMBAR(u64 inst) {
+ union {
+ u64 raw;
+ BitField<8, 2, LocalScope> scope;
+ } const membar{inst};
+
+ if (membar.scope == LocalScope::CTA) {
+ ir.WorkgroupMemoryBarrier();
+ } else {
+ ir.DeviceMemoryBarrier();
+ }
+}
+
+void TranslatorVisitor::DEPBAR() {
+ // DEPBAR is a no-op
+}
+
+void TranslatorVisitor::BAR(u64 insn) {
+ enum class Mode {
+ RedPopc,
+ Scan,
+ RedAnd,
+ RedOr,
+ Sync,
+ Arrive,
+ };
+ union {
+ u64 raw;
+ BitField<43, 1, u64> is_a_imm;
+ BitField<44, 1, u64> is_b_imm;
+ BitField<8, 8, u64> imm_a;
+ BitField<20, 12, u64> imm_b;
+ BitField<42, 1, u64> neg_pred;
+ BitField<39, 3, IR::Pred> pred;
+ } const bar{insn};
+
+ const Mode mode{[insn] {
+ switch (insn & 0x0000009B00000000ULL) {
+ case 0x0000000200000000ULL:
+ return Mode::RedPopc;
+ case 0x0000000300000000ULL:
+ return Mode::Scan;
+ case 0x0000000A00000000ULL:
+ return Mode::RedAnd;
+ case 0x0000001200000000ULL:
+ return Mode::RedOr;
+ case 0x0000008000000000ULL:
+ return Mode::Sync;
+ case 0x0000008100000000ULL:
+ return Mode::Arrive;
+ }
+ throw NotImplementedException("Invalid encoding");
+ }()};
+ if (mode != Mode::Sync) {
+ throw NotImplementedException("BAR mode {}", mode);
+ }
+ if (bar.is_a_imm == 0) {
+ throw NotImplementedException("Non-immediate input A");
+ }
+ if (bar.imm_a != 0) {
+ throw NotImplementedException("Non-zero input A");
+ }
+ if (bar.is_b_imm == 0) {
+ throw NotImplementedException("Non-immediate input B");
+ }
+ if (bar.imm_b != 0) {
+ throw NotImplementedException("Non-zero input B");
+ }
+ if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
+ throw NotImplementedException("Non-true input predicate");
+ }
+ ir.Barrier();
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<40, 1, u64> brev;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const bfe{insn};
+
+ const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
+ const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+
+ // Common constants
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 one{v.ir.Imm32(1)};
+ const IR::U32 max_size{v.ir.Imm32(32)};
+ // Edge case conditions
+ const IR::U1 zero_count{v.ir.IEqual(count, zero)};
+ const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
+ const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
+
+ IR::U32 base{v.X(bfe.offset_reg)};
+ if (bfe.brev != 0) {
+ base = v.ir.BitReverse(base);
+ }
+ IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
+ if (bfe.is_signed != 0) {
+ const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
+ const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+ const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
+ // Replicate condition
+ result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
+ // Exceeding condition
+ const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
+ result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
+ }
+ // Zero count condition
+ result = IR::U32{v.ir.Select(zero_count, zero, result)};
+
+ v.X(bfe.dest_reg, result);
+
+ if (bfe.cc != 0) {
+ v.SetZFlag(v.ir.IEqual(result, zero));
+ v.SetSFlag(v.ir.ILessThan(result, zero, true));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFE_reg(u64 insn) {
+ BFE(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::BFE_cbuf(u64 insn) {
+ BFE(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFE_imm(u64 insn) {
+ BFE(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> insert_reg;
+ BitField<47, 1, u64> cc;
+ } const bfi{insn};
+
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
+ const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+ const IR::U32 max_size{v.ir.Imm32(32)};
+
+ // Edge case conditions
+ const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
+ const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
+
+ const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
+ const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
+
+ const IR::U32 insert{v.X(bfi.insert_reg)};
+ IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
+
+ result = IR::U32{v.ir.Select(exceed_offset, base, result)};
+
+ v.X(bfi.dest_reg, result);
+ if (bfi.cc != 0) {
+ v.SetZFlag(v.ir.IEqual(result, zero));
+ v.SetSFlag(v.ir.ILessThan(result, zero, true));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFI_reg(u64 insn) {
+ BFI(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_rc(u64 insn) {
+ BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFI_cr(u64 insn) {
+ BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_imm(u64 insn) {
+ BFI(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void Check(u64 insn) {
+ union {
+ u64 raw;
+ BitField<5, 1, u64> cbuf_mode;
+ BitField<6, 1, u64> lmt;
+ } const encoding{insn};
+
+ if (encoding.cbuf_mode != 0) {
+ throw NotImplementedException("Constant buffer mode");
+ }
+ if (encoding.lmt != 0) {
+ throw NotImplementedException("LMT");
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BRX(u64 insn) {
+ Check(insn);
+}
+
+void TranslatorVisitor::JMX(u64 insn) {
+ Check(insn);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+
+namespace Shader::Maxwell {
+
+enum class FpRounding : u64 {
+ RN,
+ RM,
+ RP,
+ RZ,
+};
+
+enum class FmzMode : u64 {
+ None,
+ FTZ,
+ FMZ,
+ INVALIDFMZ3,
+};
+
+inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
+ switch (fp_rounding) {
+ case FpRounding::RN:
+ return IR::FpRounding::RN;
+ case FpRounding::RM:
+ return IR::FpRounding::RM;
+ case FpRounding::RP:
+ return IR::FpRounding::RP;
+ case FpRounding::RZ:
+ return IR::FpRounding::RZ;
+ }
+ throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
+}
+
+inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
+ switch (fmz_mode) {
+ case FmzMode::None:
+ return IR::FmzMode::None;
+ case FmzMode::FTZ:
+ return IR::FmzMode::FTZ;
+ case FmzMode::FMZ:
+ // FMZ is manually handled in the instruction
+ return IR::FmzMode::FTZ;
+ case FmzMode::INVALIDFMZ3:
+ break;
+ }
+ throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+
+namespace Shader::Maxwell {
+IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed) {
+ switch (compare_op) {
+ case CompareOp::False:
+ return ir.Imm1(false);
+ case CompareOp::LessThan:
+ return ir.ILessThan(operand_1, operand_2, is_signed);
+ case CompareOp::Equal:
+ return ir.IEqual(operand_1, operand_2);
+ case CompareOp::LessThanEqual:
+ return ir.ILessThanEqual(operand_1, operand_2, is_signed);
+ case CompareOp::GreaterThan:
+ return ir.IGreaterThan(operand_1, operand_2, is_signed);
+ case CompareOp::NotEqual:
+ return ir.INotEqual(operand_1, operand_2);
+ case CompareOp::GreaterThanEqual:
+ return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
+ case CompareOp::True:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid compare op {}", compare_op);
+ }
+}
+
+IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed) {
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
+ const IR::U1 z_flag{ir.GetZFlag()};
+ const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
+ const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
+ : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
+ ir.ILessThan(operand_2, zero, true))};
+ switch (compare_op) {
+ case CompareOp::False:
+ return ir.Imm1(false);
+ case CompareOp::LessThan:
+ return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+ ir.ILessThan(intermediate, zero, true))};
+ case CompareOp::Equal:
+ return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
+ case CompareOp::LessThanEqual: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+ ir.ILessThan(intermediate, zero, true))};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+ }
+ case CompareOp::GreaterThan: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
+ ir.IGreaterThan(intermediate, zero, true))};
+ const IR::U1 not_z{ir.LogicalNot(z_flag)};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
+ }
+ case CompareOp::NotEqual:
+ return ir.LogicalOr(ir.INotEqual(intermediate, zero),
+ ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
+ case CompareOp::GreaterThanEqual: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
+ ir.IGreaterThanEqual(intermediate, zero, true))};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+ }
+ case CompareOp::True:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid compare op {}", compare_op);
+ }
+}
+
+IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
+ BooleanOp bop) {
+ switch (bop) {
+ case BooleanOp::AND:
+ return ir.LogicalAnd(predicate_1, predicate_2);
+ case BooleanOp::OR:
+ return ir.LogicalOr(predicate_1, predicate_2);
+ case BooleanOp::XOR:
+ return ir.LogicalXor(predicate_1, predicate_2);
+ default:
+ throw NotImplementedException("Invalid bop {}", bop);
+ }
+}
+
+IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
+ switch (op) {
+ case PredicateOp::False:
+ return ir.Imm1(false);
+ case PredicateOp::True:
+ return ir.Imm1(true);
+ case PredicateOp::Zero:
+ return ir.IEqual(result, ir.Imm32(0));
+ case PredicateOp::NonZero:
+ return ir.INotEqual(result, ir.Imm32(0));
+ default:
+ throw NotImplementedException("Invalid Predicate operation {}", op);
+ }
+}
+
+bool IsCompareOpOrdered(FPCompareOp op) {
+ switch (op) {
+ case FPCompareOp::LTU:
+ case FPCompareOp::EQU:
+ case FPCompareOp::LEU:
+ case FPCompareOp::GTU:
+ case FPCompareOp::NEU:
+ case FPCompareOp::GEU:
+ return false;
+ default:
+ return true;
+ }
+}
+
+IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+ const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+ IR::FpControl control) {
+ const bool ordered{IsCompareOpOrdered(compare_op)};
+ switch (compare_op) {
+ case FPCompareOp::F:
+ return ir.Imm1(false);
+ case FPCompareOp::LT:
+ case FPCompareOp::LTU:
+ return ir.FPLessThan(operand_1, operand_2, control, ordered);
+ case FPCompareOp::EQ:
+ case FPCompareOp::EQU:
+ return ir.FPEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::LE:
+ case FPCompareOp::LEU:
+ return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::GT:
+ case FPCompareOp::GTU:
+ return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
+ case FPCompareOp::NE:
+ case FPCompareOp::NEU:
+ return ir.FPNotEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::GE:
+ case FPCompareOp::GEU:
+ return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::NUM:
+ return ir.FPOrdered(operand_1, operand_2);
+ case FPCompareOp::Nan:
+ return ir.FPUnordered(operand_1, operand_2);
+ case FPCompareOp::T:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid FP compare op {}", compare_op);
+ }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
+
+[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, CompareOp compare_op,
+ bool is_signed);
+
+[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
+ const IR::U1& predicate_2, BooleanOp bop);
+
+[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
+
+[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
+
+[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+ const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+ IR::FpControl control = {});
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+void TranslatorVisitor::CSET(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 5, IR::FlowTest> cc_test;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ } const cset{insn};
+
+ const IR::U32 one_mask{ir.Imm32(-1)};
+ const IR::U32 fp_one{ir.Imm32(0x3f800000)};
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
+ const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
+ const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
+ const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
+ const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
+ X(cset.dest_reg, result);
+ if (cset.cc != 0) {
+ const IR::U1 is_zero{ir.IEqual(result, zero)};
+ SetZFlag(is_zero);
+ if (cset.bf != 0) {
+ ResetSFlag();
+ } else {
+ SetSFlag(ir.LogicalNot(is_zero));
+ }
+ ResetOFlag();
+ ResetCFlag();
+ }
+}
+
+void TranslatorVisitor::CSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<8, 5, IR::FlowTest> cc_test;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<45, 2, BooleanOp> bop;
+ } const csetp{insn};
+
+ const BooleanOp bop{csetp.bop};
+ const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
+ const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
+ const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
+ ir.SetPred(csetp.dest_pred_a, result_a);
+ ir.SetPred(csetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<45, 1, u64> neg_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_a;
+ BitField<49, 1, u64> abs_b;
+ } const dadd{insn};
+ if (dadd.cc != 0) {
+ throw NotImplementedException("DADD CC");
+ }
+
+ const IR::F64 src_a{v.D(dadd.src_a_reg)};
+ const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dadd.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DADD_reg(u64 insn) {
+ DADD(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DADD_cbuf(u64 insn) {
+ DADD(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DADD_imm(u64 insn) {
+ DADD(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 4, FPCompareOp> compare_op;
+ BitField<52, 1, u64> bf;
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ } const dset{insn};
+
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
+
+ IR::U1 pred{v.ir.GetPred(dset.pred)};
+ if (dset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(dset.dest_reg, result);
+ if (dset.cc != 0) {
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (dset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSET_reg(u64 insn) {
+ DSET(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSET_cbuf(u64 insn) {
+ DSET(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSET_imm(u64 insn) {
+ DSET(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<50, 2, FpRounding> fp_rounding;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_c;
+ } const dfma{insn};
+
+ if (dfma.cc != 0) {
+ throw NotImplementedException("DFMA CC");
+ }
+
+ const IR::F64 src_a{v.D(dfma.src_a_reg)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
+ const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dfma.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DFMA_reg(u64 insn) {
+ DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_cr(u64 insn) {
+ DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_rc(u64 insn) {
+ DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DFMA_imm(u64 insn) {
+ DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ } const dmnmx{insn};
+
+ if (dmnmx.cc != 0) {
+ throw NotImplementedException("DMNMX CC");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
+
+ IR::F64 max{v.ir.FPMax(op_a, op_b)};
+ IR::F64 min{v.ir.FPMin(op_a, op_b)};
+
+ if (dmnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+ v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMNMX_reg(u64 insn) {
+ DMNMX(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
+ DMNMX(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMNMX_imm(u64 insn) {
+ DMNMX(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg;
+ } const dmul{insn};
+
+ if (dmul.cc != 0) {
+ throw NotImplementedException("DMUL CC");
+ }
+
+ const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dmul.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMUL_reg(u64 insn) {
+ DMUL(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMUL_cbuf(u64 insn) {
+ DMUL(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMUL_imm(u64 insn) {
+ DMUL(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<6, 1, u64> negate_b;
+ BitField<7, 1, u64> abs_a;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const dsetp{insn};
+
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
+
+ const BooleanOp bop{dsetp.bop};
+ const FPCompareOp compare_op{dsetp.compare_op};
+ const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
+ const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(dsetp.dest_pred_a, result_a);
+ v.ir.SetPred(dsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSETP_reg(u64 insn) {
+ DSETP(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSETP_cbuf(u64 insn) {
+ DSETP(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSETP_imm(u64 insn) {
+ DSETP(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ExitFragment(TranslatorVisitor& v) {
+ const ProgramHeader sph{v.env.SPH()};
+ IR::Reg src_reg{IR::Reg::R0};
+ for (u32 render_target = 0; render_target < 8; ++render_target) {
+ const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
+ for (u32 component = 0; component < 4; ++component) {
+ if (!mask[component]) {
+ continue;
+ }
+ v.ir.SetFragColor(render_target, component, v.F(src_reg));
+ ++src_reg;
+ }
+ }
+ if (sph.ps.omap.sample_mask != 0) {
+ v.ir.SetSampleMask(v.X(src_reg));
+ }
+ if (sph.ps.omap.depth != 0) {
+ v.ir.SetFragDepth(v.F(src_reg + 1));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::EXIT() {
+ switch (env.ShaderStage()) {
+ case Stage::Fragment:
+ ExitFragment(*this);
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<40, 1, u64> tilde;
+ BitField<41, 1, u64> shift;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const flo{insn};
+
+ if (flo.cc != 0) {
+ throw NotImplementedException("CC");
+ }
+ if (flo.tilde != 0) {
+ src = v.ir.BitwiseNot(src);
+ }
+ IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
+ if (flo.shift != 0) {
+ const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
+ result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
+ }
+ v.X(flo.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FLO_reg(u64 insn) {
+ FLO(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::FLO_cbuf(u64 insn) {
+ FLO(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::FLO_imm(u64 insn) {
+ FLO(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
+ const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const fadd{insn};
+
+ if (cc) {
+ throw NotImplementedException("FADD CC");
+ }
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
+ IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(fadd.dest_reg, value);
+}
+
+void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 raw;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> neg_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_a;
+ BitField<49, 1, u64> abs_b;
+ BitField<50, 1, u64> sat;
+ } const fadd{insn};
+
+ FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
+ fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FADD_reg(u64 insn) {
+ FADD(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FADD_cbuf(u64 insn) {
+ FADD(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FADD_imm(u64 insn) {
+ FADD(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 1, u64> ftz;
+ BitField<56, 1, u64> neg_a;
+ BitField<54, 1, u64> abs_a;
+ BitField<52, 1, u64> cc;
+ BitField<53, 1, u64> neg_b;
+ BitField<57, 1, u64> abs_b;
+ } const fadd32i{insn};
+
+ FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
+ fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const fcmp{insn};
+
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
+ const IR::U32 src_reg{v.X(fcmp.src_reg)};
+ const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+ v.X(fcmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FCMP_reg(u64 insn) {
+ FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_rc(u64 insn) {
+ FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FCMP_cr(u64 insn) {
+ FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const fcmp{insn};
+ const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
+ const u32 value{static_cast<u32>(fcmp.value) << 12};
+
+ FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 4, FPCompareOp> compare_op;
+ BitField<52, 1, u64> bf;
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ BitField<55, 1, u64> ftz;
+ } const fset{insn};
+
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
+ const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(fset.pred)};
+ if (fset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(fset.dest_reg, result);
+ if (fset.cc != 0) {
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (fset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSET_reg(u64 insn) {
+ FSET(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSET_cbuf(u64 insn) {
+ FSET(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSET_imm(u64 insn) {
+ FSET(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+};
+
+enum class RoundingOp : u64 {
+ None = 0,
+ Pass = 3,
+ Round = 8,
+ Floor = 9,
+ Ceil = 10,
+ Trunc = 11,
+};
+
+[[nodiscard]] u32 WidthSize(FloatFormat width) {
+ switch (width) {
+ case FloatFormat::F16:
+ return 16;
+ case FloatFormat::F32:
+ return 32;
+ case FloatFormat::F64:
+ return 64;
+ default:
+ throw NotImplementedException("Invalid width {}", width);
+ }
+}
+
+void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> neg;
+ BitField<47, 1, u64> cc;
+ BitField<50, 1, u64> sat;
+ BitField<39, 4, u64> rounding_op;
+ BitField<39, 2, FpRounding> rounding;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<8, 2, FloatFormat> dst_size;
+
+ [[nodiscard]] RoundingOp RoundingOperation() const {
+ constexpr u64 rounding_mask = 0x0B;
+ return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
+ }
+ } const f2f{insn};
+
+ if (f2f.cc != 0) {
+ throw NotImplementedException("F2F CC");
+ }
+
+ IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
+
+ const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
+ IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ if (f2f.src_size != f2f.dst_size) {
+ fp_control.rounding = CastFpRounding(f2f.rounding);
+ input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
+ } else {
+ switch (f2f.RoundingOperation()) {
+ case RoundingOp::None:
+ case RoundingOp::Pass:
+ // Make sure NANs are handled properly
+ switch (f2f.src_size) {
+ case FloatFormat::F16:
+ input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
+ break;
+ case FloatFormat::F32:
+ input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
+ break;
+ case FloatFormat::F64:
+ input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
+ break;
+ }
+ break;
+ case RoundingOp::Round:
+ input = v.ir.FPRoundEven(input, fp_control);
+ break;
+ case RoundingOp::Floor:
+ input = v.ir.FPFloor(input, fp_control);
+ break;
+ case RoundingOp::Ceil:
+ input = v.ir.FPCeil(input, fp_control);
+ break;
+ case RoundingOp::Trunc:
+ input = v.ir.FPTrunc(input, fp_control);
+ break;
+ default:
+ throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
+ }
+ }
+ if (f2f.sat != 0 && !any_fp64) {
+ input = v.ir.FPSaturate(input);
+ }
+
+ switch (f2f.dst_size) {
+ case FloatFormat::F16: {
+ const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+ v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
+ break;
+ }
+ case FloatFormat::F32:
+ v.F(f2f.dest_reg, input);
+ break;
+ case FloatFormat::F64:
+ v.D(f2f.dest_reg, input);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2F_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
+ src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatReg20(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleReg20(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
+ src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatCbuf(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleCbuf(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ BitField<20, 19, u64> imm;
+ BitField<56, 1, u64> imm_neg;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
+ const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
+ src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
+ if (f2f.imm_neg != 0) {
+ throw NotImplementedException("Neg bit on F16");
+ }
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatImm20(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleImm20(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <limits>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class DestFormat : u64 {
+ Invalid,
+ I16,
+ I32,
+ I64,
+};
+enum class SrcFormat : u64 {
+ Invalid,
+ F16,
+ F32,
+ F64,
+};
+enum class Rounding : u64 {
+ Round,
+ Floor,
+ Ceil,
+ Trunc,
+};
+
+union F2I {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, DestFormat> dest_format;
+ BitField<10, 2, SrcFormat> src_format;
+ BitField<12, 1, u64> is_signed;
+ BitField<39, 2, Rounding> rounding;
+ BitField<41, 1, u64> half;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> abs;
+ BitField<47, 1, u64> cc;
+ BitField<49, 1, u64> neg;
+};
+
+size_t BitSize(DestFormat dest_format) {
+ switch (dest_format) {
+ case DestFormat::I16:
+ return 16;
+ case DestFormat::I32:
+ return 32;
+ case DestFormat::I64:
+ return 64;
+ default:
+ throw NotImplementedException("Invalid destination format {}", dest_format);
+ }
+}
+
+std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
+ if (is_signed) {
+ switch (format) {
+ case DestFormat::I16:
+ return {static_cast<f64>(std::numeric_limits<s16>::max()),
+ static_cast<f64>(std::numeric_limits<s16>::min())};
+ case DestFormat::I32:
+ return {static_cast<f64>(std::numeric_limits<s32>::max()),
+ static_cast<f64>(std::numeric_limits<s32>::min())};
+ case DestFormat::I64:
+ return {static_cast<f64>(std::numeric_limits<s64>::max()),
+ static_cast<f64>(std::numeric_limits<s64>::min())};
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case DestFormat::I16:
+ return {static_cast<f64>(std::numeric_limits<u16>::max()),
+ static_cast<f64>(std::numeric_limits<u16>::min())};
+ case DestFormat::I32:
+ return {static_cast<f64>(std::numeric_limits<u32>::max()),
+ static_cast<f64>(std::numeric_limits<u32>::min())};
+ case DestFormat::I64:
+ return {static_cast<f64>(std::numeric_limits<u64>::max()),
+ static_cast<f64>(std::numeric_limits<u64>::min())};
+ default:
+ break;
+ }
+ }
+ throw NotImplementedException("Invalid destination format {}", format);
+}
+
+IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 14, s64> offset;
+ BitField<34, 5, u64> binding;
+ } const cbuf{insn};
+ if (cbuf.binding >= 18) {
+ throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+ }
+ if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
+ throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
+ }
+ if (cbuf.offset % 2 != 0) {
+ throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
+ }
+ const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
+ const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
+ const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
+ const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
+ return v.ir.PackDouble2x32(vector);
+}
+
+void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
+ // F2I is used to convert from a floating point value to an integer
+ const F2I f2i{insn};
+
+ const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
+ f2i.dest_format != DestFormat::I64};
+ IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
+ if (denorm_cares) {
+ fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
+ }
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = fmz_mode,
+ };
+ const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
+ const IR::F16F32F64 rounded_value{[&] {
+ switch (f2i.rounding) {
+ case Rounding::Round:
+ return v.ir.FPRoundEven(op_a, fp_control);
+ case Rounding::Floor:
+ return v.ir.FPFloor(op_a, fp_control);
+ case Rounding::Ceil:
+ return v.ir.FPCeil(op_a, fp_control);
+ case Rounding::Trunc:
+ return v.ir.FPTrunc(op_a, fp_control);
+ default:
+ throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
+ }
+ }()};
+ const bool is_signed{f2i.is_signed != 0};
+ const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
+
+ IR::F16F32F64 intermediate;
+ switch (f2i.src_format) {
+ case SrcFormat::F16: {
+ const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
+ const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ case SrcFormat::F32: {
+ const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
+ const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ case SrcFormat::F64: {
+ const IR::F64 max_val{v.ir.Imm64(max_bound)};
+ const IR::F64 min_val{v.ir.Imm64(min_bound)};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
+ }
+
+ const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
+ IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
+
+ bool handled_special_case = false;
+ const bool special_nan_cases =
+ (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
+ if (special_nan_cases) {
+ if (f2i.dest_format == DestFormat::I32) {
+ handled_special_case = true;
+ result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
+ } else if (f2i.dest_format == DestFormat::I64) {
+ handled_special_case = true;
+ result = IR::U64{
+ v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
+ }
+ }
+ if (!handled_special_case && is_signed) {
+ if (bitsize != 64) {
+ result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
+ } else {
+ result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
+ }
+ }
+
+ if (bitsize == 64) {
+ v.L(f2i.dest_reg, result);
+ } else {
+ v.X(f2i.dest_reg, result);
+ }
+
+ if (f2i.cc != 0) {
+ throw NotImplementedException("F2I CC");
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2I_reg(u64 insn) {
+ union {
+ u64 raw;
+ F2I base;
+ BitField<20, 8, IR::Reg> src_reg;
+ } const f2i{insn};
+
+ const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+ switch (f2i.base.src_format) {
+ case SrcFormat::F16:
+ return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
+ case SrcFormat::F32:
+ return F(f2i.src_reg);
+ case SrcFormat::F64:
+ return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
+ default:
+ throw NotImplementedException("Invalid F2I source format {}",
+ f2i.base.src_format.Value());
+ }
+ }()};
+ TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_cbuf(u64 insn) {
+ const F2I f2i{insn};
+ const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+ switch (f2i.src_format) {
+ case SrcFormat::F16:
+ return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
+ case SrcFormat::F32:
+ return GetFloatCbuf(insn);
+ case SrcFormat::F64: {
+ return UnpackCbuf(*this, insn);
+ }
+ default:
+ throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
+ }
+ }()};
+ TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_imm(u64) {
+ throw NotImplementedException("{}", Opcode::F2I_imm);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
+ bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const ffma{insn};
+
+ if (cc) {
+ throw NotImplementedException("FFMA CC");
+ }
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+ const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = CastFmzMode(fmz_mode),
+ };
+ IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
+ if (fmz_mode == FmzMode::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+ const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+ const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+ value = IR::F32{v.ir.Select(any_zero, op_c, value)};
+ }
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(ffma.dest_reg, value);
+}
+
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
+ union {
+ u64 raw;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> sat;
+ BitField<51, 2, FpRounding> fp_rounding;
+ BitField<53, 2, FmzMode> fmz_mode;
+ } const ffma{insn};
+
+ FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
+ ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FFMA_reg(u64 insn) {
+ FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_rc(u64 insn) {
+ FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FFMA_cr(u64 insn) {
+ FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_imm(u64 insn) {
+ FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
+ BitField<52, 1, u64> cc;
+ BitField<53, 2, FmzMode> fmz_mode;
+ BitField<55, 1, u64> sat;
+ BitField<56, 1, u64> neg_a;
+ BitField<57, 1, u64> neg_c;
+ } const ffma32i{insn};
+
+ FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
+ ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ } const fmnmx{insn};
+
+ if (fmnmx.cc) {
+ throw NotImplementedException("FMNMX CC");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
+ IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
+
+ if (fmnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+
+ v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMNMX_reg(u64 insn) {
+ FMNMX(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
+ FMNMX(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMNMX_imm(u64 insn) {
+ FMNMX(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Operation : u64 {
+ Cos = 0,
+ Sin = 1,
+ Ex2 = 2, // Base 2 exponent
+ Lg2 = 3, // Base 2 logarithm
+ Rcp = 4, // Reciprocal
+ Rsq = 5, // Reciprocal square root
+ Rcp64H = 6, // 64-bit reciprocal
+ Rsq64H = 7, // 64-bit reciprocal square root
+ Sqrt = 8,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MUFU(u64 insn) {
+ // MUFU is used to implement a bunch of special functions. See Operation.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 4, Operation> operation;
+ BitField<46, 1, u64> abs;
+ BitField<48, 1, u64> neg;
+ BitField<50, 1, u64> sat;
+ } const mufu{insn};
+
+ const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
+ IR::F32 value{[&]() -> IR::F32 {
+ switch (mufu.operation) {
+ case Operation::Cos:
+ return ir.FPCos(op_a);
+ case Operation::Sin:
+ return ir.FPSin(op_a);
+ case Operation::Ex2:
+ return ir.FPExp2(op_a);
+ case Operation::Lg2:
+ return ir.FPLog2(op_a);
+ case Operation::Rcp:
+ return ir.FPRecip(op_a);
+ case Operation::Rsq:
+ return ir.FPRecipSqrt(op_a);
+ case Operation::Rcp64H:
+ throw NotImplementedException("MUFU.RCP64H");
+ case Operation::Rsq64H:
+ throw NotImplementedException("MUFU.RSQ64H");
+ case Operation::Sqrt:
+ return ir.FPSqrt(op_a);
+ default:
+ throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
+ }
+ }()};
+
+ if (mufu.sat) {
+ value = ir.FPSaturate(value);
+ }
+
+ F(mufu.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Scale : u64 {
+ None,
+ D2,
+ D4,
+ D8,
+ M8,
+ M4,
+ M2,
+ INVALIDSCALE37,
+};
+
+float ScaleFactor(Scale scale) {
+ switch (scale) {
+ case Scale::None:
+ return 1.0f;
+ case Scale::D2:
+ return 1.0f / 2.0f;
+ case Scale::D4:
+ return 1.0f / 4.0f;
+ case Scale::D8:
+ return 1.0f / 8.0f;
+ case Scale::M8:
+ return 8.0f;
+ case Scale::M4:
+ return 4.0f;
+ case Scale::M2:
+ return 2.0f;
+ case Scale::INVALIDSCALE37:
+ break;
+ }
+ throw NotImplementedException("Invalid FMUL scale {}", scale);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
+ FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const fmul{insn};
+
+ if (cc) {
+ throw NotImplementedException("FMUL CC");
+ }
+ IR::F32 op_a{v.F(fmul.src_a)};
+ if (scale != Scale::None) {
+ if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
+ throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
+ }
+ op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
+ }
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = CastFmzMode(fmz_mode),
+ };
+ IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
+ if (fmz_mode == FmzMode::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+ const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+ const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+ value = IR::F32{v.ir.Select(any_zero, zero, value)};
+ }
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(fmul.dest_reg, value);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 raw;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<41, 3, Scale> scale;
+ BitField<44, 2, FmzMode> fmz;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<50, 1, u64> sat;
+ } const fmul{insn};
+
+ FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
+ fmul.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMUL_reg(u64 insn) {
+ return FMUL(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMUL_cbuf(u64 insn) {
+ return FMUL(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMUL_imm(u64 insn) {
+ return FMUL(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FMUL32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 2, FmzMode> fmz;
+ BitField<55, 1, u64> sat;
+ } const fmul32i{insn};
+
+ FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
+ fmul32i.sat != 0, fmul32i.cc != 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ SINCOS,
+ EX2,
+};
+
+void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 1, Mode> mode;
+ BitField<45, 1, u64> neg;
+ BitField<49, 1, u64> abs;
+ } const rro{insn};
+
+ v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::RRO_reg(u64 insn) {
+ RRO(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::RRO_cbuf(u64 insn) {
+ RRO(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::RRO_imm(u64) {
+ throw NotImplementedException("RRO (imm)");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<6, 1, u64> negate_b;
+ BitField<7, 1, u64> abs_a;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const fsetp{insn};
+
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
+ const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ const BooleanOp bop{fsetp.bop};
+ const FPCompareOp compare_op{fsetp.compare_op};
+ const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
+ const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(fsetp.dest_pred_a, result_a);
+ v.ir.SetPred(fsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSETP_reg(u64 insn) {
+ FSETP(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSETP_cbuf(u64 insn) {
+ FSETP(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSETP_imm(u64 insn) {
+ FSETP(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::FSWZADD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<28, 8, u64> swizzle;
+ BitField<38, 1, u64> ndv;
+ BitField<39, 2, FpRounding> round;
+ BitField<44, 1, u64> ftz;
+ BitField<47, 1, u64> cc;
+ } const fswzadd{insn};
+
+ if (fswzadd.ndv != 0) {
+ throw NotImplementedException("FSWZADD NDV");
+ }
+
+ const IR::F32 src_a{GetFloatReg8(insn)};
+ const IR::F32 src_b{GetFloatReg20(insn)};
+ const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
+
+ const IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = CastFpRounding(fswzadd.round),
+ .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
+ F(fswzadd.dest_reg, result);
+
+ if (fswzadd.cc != 0) {
+ throw NotImplementedException("FSWZADD CC");
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
+ Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hadd2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+ lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
+}
+
+void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
+ const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<49, 2, Merge> merge;
+ BitField<39, 1, u64> ftz;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hadd2{insn};
+
+ HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
+ hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HADD2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<32, 1, u64> sat;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hadd2{insn};
+
+ HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
+ GetReg20(insn));
+}
+
+void TranslatorVisitor::HADD2_cbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_b;
+ BitField<54, 1, u64> abs_b;
+ } const hadd2{insn};
+
+ HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
+ GetCbuf(insn));
+}
+
+void TranslatorVisitor::HADD2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hadd2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
+ HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HADD2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 1, u64> ftz;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_a;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<20, 32, u64> imm32;
+ } const hadd2{insn};
+
+ const u32 imm{static_cast<u32>(hadd2.imm32)};
+ HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
+ hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
+ Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
+ bool sat, HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hfma2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ if (lhs_c.Type() == IR::Type::F16) {
+ lhs_c = v.ir.FPConvert(32, lhs_c);
+ rhs_c = v.ir.FPConvert(32, rhs_c);
+ }
+ }
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
+
+ lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
+ rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = HalfPrecision2FmzMode(precision),
+ };
+ IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
+ if (precision == HalfPrecision::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+ const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+ const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+ lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
+
+ const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+ const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+ const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+ rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
+ }
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
+}
+
+void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
+ Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
+ HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<47, 2, Swizzle> swizzle_a;
+ BitField<49, 2, Merge> merge;
+ } const hfma2{insn};
+
+ HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
+ sat, precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HFMA2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<28, 2, Swizzle> swizzle_b;
+ BitField<32, 1, u64> saturate;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> neg_c;
+ BitField<35, 2, Swizzle> swizzle_c;
+ BitField<37, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
+ GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_rc(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_b;
+ BitField<56, 1, u64> neg_b;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
+ GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_cr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_c;
+ BitField<56, 1, u64> neg_b;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
+ GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_c;
+
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
+ GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_c;
+ BitField<20, 32, u64> imm32;
+ BitField<52, 1, u64> neg_c;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<55, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ const u32 imm{static_cast<u32>(hfma2.imm32)};
+ HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
+ Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
+ switch (precision) {
+ case HalfPrecision::None:
+ return IR::FmzMode::None;
+ case HalfPrecision::FTZ:
+ return IR::FmzMode::FTZ;
+ case HalfPrecision::FMZ:
+ return IR::FmzMode::FMZ;
+ default:
+ return IR::FmzMode::DontCare;
+ }
+}
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
+ switch (swizzle) {
+ case Swizzle::H1_H0: {
+ const IR::Value vector{ir.UnpackFloat2x16(value)};
+ return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
+ }
+ case Swizzle::H0_H0: {
+ const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
+ return {scalar, scalar};
+ }
+ case Swizzle::H1_H1: {
+ const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
+ return {scalar, scalar};
+ }
+ case Swizzle::F32: {
+ const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
+ return {scalar, scalar};
+ }
+ }
+ throw InvalidArgument("Invalid swizzle {}", swizzle);
+}
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+ Merge merge) {
+ switch (merge) {
+ case Merge::H1_H0:
+ return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
+ case Merge::F32:
+ return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
+ case Merge::MRG_H0:
+ case Merge::MRG_H1: {
+ const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
+ const bool is_h0{merge == Merge::MRG_H0};
+ const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
+ return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
+ }
+ }
+ throw InvalidArgument("Invalid merge {}", merge);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class Merge : u64 {
+ H1_H0,
+ F32,
+ MRG_H0,
+ MRG_H1,
+};
+
+enum class Swizzle : u64 {
+ H1_H0,
+ F32,
+ H0_H0,
+ H1_H1,
+};
+
+enum class HalfPrecision : u64 {
+ None = 0,
+ FTZ = 1,
+ FMZ = 2,
+};
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+ Merge merge);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
+ Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
+ HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hmul2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+ lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = HalfPrecision2FmzMode(precision),
+ };
+ IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
+ if (precision == HalfPrecision::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+ const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+ const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+ lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
+
+ const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+ const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+ const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+ rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
+ }
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
+}
+
+void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
+ Swizzle swizzle_b, const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<49, 2, Merge> merge;
+ BitField<47, 2, Swizzle> swizzle_a;
+ BitField<39, 2, HalfPrecision> precision;
+ } const hmul2{insn};
+
+ HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
+ hmul2.precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HMUL2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<32, 1, u64> sat;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<44, 1, u64> abs_a;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hmul2{insn};
+
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
+ hmul2.swizzle_b, GetReg20(insn));
+}
+
+void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<54, 1, u64> abs_b;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ } const hmul2{insn};
+
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
+ Swizzle::F32, GetCbuf(insn));
+}
+
+void TranslatorVisitor::HMUL2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ } const hmul2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
+ Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HMUL2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 2, HalfPrecision> precision;
+ BitField<52, 1, u64> sat;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<20, 32, u64> imm32;
+ } const hmul2{insn};
+
+ const u32 imm{static_cast<u32>(hmul2.imm32)};
+ HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
+ Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
+ bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<44, 1, u64> abs_a;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hset2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+ if (lhs_a.Type() != lhs_b.Type()) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+
+ lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(hset2.pred)};
+ if (hset2.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+ const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+ const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
+ const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
+
+ const u32 true_value = bf ? 0x3c00 : 0xffff;
+ const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
+ const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
+ const IR::U32 fail_result{v.ir.Imm32(0)};
+ const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
+ const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
+
+ v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSET2_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<30, 1, u64> abs_b;
+ BitField<49, 1, u64> bf;
+ BitField<31, 1, u64> neg_b;
+ BitField<50, 1, u64> ftz;
+ BitField<35, 4, FPCompareOp> compare_op;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hset2{insn};
+
+ HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
+ hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
+}
+
+void TranslatorVisitor::HSET2_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> bf;
+ BitField<56, 1, u64> neg_b;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ } const hset2{insn};
+
+ HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
+ hset2.compare_op, Swizzle::F32);
+}
+
+void TranslatorVisitor::HSET2_imm(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> bf;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hset2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
+ Swizzle::H1_H0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
+ Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
+ union {
+ u64 insn;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<44, 1, u64> abs_a;
+ BitField<6, 1, u64> ftz;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hsetp2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+ if (lhs_a.Type() != lhs_b.Type()) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+
+ lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
+ if (hsetp2.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+ const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+ const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
+ const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
+
+ if (h_and) {
+ auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
+ v.ir.SetPred(hsetp2.dest_pred_a, result);
+ v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
+ } else {
+ v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
+ v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSETP2_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<30, 1, u64> abs_b;
+ BitField<49, 1, u64> h_and;
+ BitField<31, 1, u64> neg_b;
+ BitField<35, 4, FPCompareOp> compare_op;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hsetp2{insn};
+ HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
+ hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> h_and;
+ BitField<54, 1, u64> abs_b;
+ BitField<56, 1, u64> neg_b;
+ BitField<49, 4, FPCompareOp> compare_op;
+ } const hsetp2{insn};
+
+ HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
+ hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_imm(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> h_and;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hsetp2{insn};
+
+ const u32 imm{static_cast<u32>(hsetp2.low << 6) |
+ static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hsetp2.high << 22) |
+ static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
+ hsetp2.h_and != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
+ u32 offset) {
+ if (unaligned) {
+ return ir.Imm32(0);
+ }
+ return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
+}
+} // Anonymous namespace
+
+IR::U32 TranslatorVisitor::X(IR::Reg reg) {
+ return ir.GetReg(reg);
+}
+
+IR::U64 TranslatorVisitor::L(IR::Reg reg) {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+IR::F32 TranslatorVisitor::F(IR::Reg reg) {
+ return ir.BitCast<IR::F32>(X(reg));
+}
+
+IR::F64 TranslatorVisitor::D(IR::Reg reg) {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
+ ir.SetReg(dest_reg, value);
+}
+
+void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register {}", dest_reg);
+ }
+ const IR::Value result{ir.UnpackUint2x32(value)};
+ for (int i = 0; i < 2; i++) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+}
+
+void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
+ X(dest_reg, ir.BitCast<IR::U32>(value));
+}
+
+void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register {}", dest_reg);
+ }
+ const IR::Value result{ir.UnpackDouble2x32(value)};
+ for (int i = 0; i < 2; i++) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+}
+
+IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
+ union {
+ u64 raw;
+ BitField<39, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg8(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg20(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg39(insn));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> index;
+ } const reg{insn};
+ return D(reg.index);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
+ union {
+ u64 raw;
+ BitField<39, 8, IR::Reg> index;
+ } const reg{insn};
+ return D(reg.index);
+}
+
+static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 14, u64> offset;
+ BitField<34, 5, u64> binding;
+ } const cbuf{insn};
+
+ if (cbuf.binding >= 18) {
+ throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+ }
+ if (cbuf.offset >= 0x10'000) {
+ throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
+ }
+ const IR::Value binding{static_cast<u32>(cbuf.binding)};
+ const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
+ return {IR::U32{binding}, IR::U32{byte_offset}};
+}
+
+IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
+ const auto [binding, byte_offset]{CbufAddr(insn)};
+ return ir.GetCbuf(binding, byte_offset);
+}
+
+IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
+ const auto [binding, byte_offset]{CbufAddr(insn)};
+ return ir.GetFloatCbuf(binding, byte_offset);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 1, u64> unaligned;
+ } const cbuf{insn};
+
+ const auto [binding, offset_value]{CbufAddr(insn)};
+ const bool unaligned{cbuf.unaligned != 0};
+ const u32 offset{offset_value.U32()};
+ const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
+
+ const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
+ const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
+ return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
+}
+
+IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 1, u64> unaligned;
+ } const cbuf{insn};
+
+ if (cbuf.unaligned != 0) {
+ throw NotImplementedException("Unaligned packed constant buffer read");
+ }
+ const auto [binding, lower_offset]{CbufAddr(insn)};
+ const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
+ const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
+ const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
+ return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
+}
+
+IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+
+ if (imm.is_negative != 0) {
+ const s64 raw{static_cast<s64>(imm.value)};
+ return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
+ } else {
+ return ir.Imm32(static_cast<u32>(imm.value));
+ }
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+ const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
+ const u32 value{static_cast<u32>(imm.value) << 12};
+ return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+ const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
+ const u64 value{imm.value << 44};
+ return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
+}
+
+IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
+ const s64 value{GetImm20(insn).U32()};
+ return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
+}
+
+IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 32, u64> value;
+ } const imm{insn};
+ return ir.Imm32(static_cast<u32>(imm.value));
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 32, u64> value;
+ } const imm{insn};
+ return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
+}
+
+void TranslatorVisitor::SetZFlag(const IR::U1& value) {
+ ir.SetZFlag(value);
+}
+
+void TranslatorVisitor::SetSFlag(const IR::U1& value) {
+ ir.SetSFlag(value);
+}
+
+void TranslatorVisitor::SetCFlag(const IR::U1& value) {
+ ir.SetCFlag(value);
+}
+
+void TranslatorVisitor::SetOFlag(const IR::U1& value) {
+ ir.SetOFlag(value);
+}
+
+void TranslatorVisitor::ResetZero() {
+ SetZFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetSFlag() {
+ SetSFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetCFlag() {
+ SetCFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetOFlag() {
+ SetOFlag(ir.Imm1(false));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+
+namespace Shader::Maxwell {
+
+enum class CompareOp : u64 {
+ False,
+ LessThan,
+ Equal,
+ LessThanEqual,
+ GreaterThan,
+ NotEqual,
+ GreaterThanEqual,
+ True,
+};
+
+enum class BooleanOp : u64 {
+ AND,
+ OR,
+ XOR,
+};
+
+enum class PredicateOp : u64 {
+ False,
+ True,
+ Zero,
+ NonZero,
+};
+
+enum class FPCompareOp : u64 {
+ F,
+ LT,
+ EQ,
+ LE,
+ GT,
+ NE,
+ GE,
+ NUM,
+ Nan,
+ LTU,
+ EQU,
+ LEU,
+ GTU,
+ NEU,
+ GEU,
+ T,
+};
+
+class TranslatorVisitor {
+public:
+ explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
+
+ Environment& env;
+ IR::IREmitter ir;
+
+ void AL2P(u64 insn);
+ void ALD(u64 insn);
+ void AST(u64 insn);
+ void ATOM_cas(u64 insn);
+ void ATOM(u64 insn);
+ void ATOMS_cas(u64 insn);
+ void ATOMS(u64 insn);
+ void B2R(u64 insn);
+ void BAR(u64 insn);
+ void BFE_reg(u64 insn);
+ void BFE_cbuf(u64 insn);
+ void BFE_imm(u64 insn);
+ void BFI_reg(u64 insn);
+ void BFI_rc(u64 insn);
+ void BFI_cr(u64 insn);
+ void BFI_imm(u64 insn);
+ void BPT(u64 insn);
+ void BRA(u64 insn);
+ void BRK(u64 insn);
+ void BRX(u64 insn);
+ void CAL();
+ void CCTL(u64 insn);
+ void CCTLL(u64 insn);
+ void CONT(u64 insn);
+ void CS2R(u64 insn);
+ void CSET(u64 insn);
+ void CSETP(u64 insn);
+ void DADD_reg(u64 insn);
+ void DADD_cbuf(u64 insn);
+ void DADD_imm(u64 insn);
+ void DEPBAR();
+ void DFMA_reg(u64 insn);
+ void DFMA_rc(u64 insn);
+ void DFMA_cr(u64 insn);
+ void DFMA_imm(u64 insn);
+ void DMNMX_reg(u64 insn);
+ void DMNMX_cbuf(u64 insn);
+ void DMNMX_imm(u64 insn);
+ void DMUL_reg(u64 insn);
+ void DMUL_cbuf(u64 insn);
+ void DMUL_imm(u64 insn);
+ void DSET_reg(u64 insn);
+ void DSET_cbuf(u64 insn);
+ void DSET_imm(u64 insn);
+ void DSETP_reg(u64 insn);
+ void DSETP_cbuf(u64 insn);
+ void DSETP_imm(u64 insn);
+ void EXIT();
+ void F2F_reg(u64 insn);
+ void F2F_cbuf(u64 insn);
+ void F2F_imm(u64 insn);
+ void F2I_reg(u64 insn);
+ void F2I_cbuf(u64 insn);
+ void F2I_imm(u64 insn);
+ void FADD_reg(u64 insn);
+ void FADD_cbuf(u64 insn);
+ void FADD_imm(u64 insn);
+ void FADD32I(u64 insn);
+ void FCHK_reg(u64 insn);
+ void FCHK_cbuf(u64 insn);
+ void FCHK_imm(u64 insn);
+ void FCMP_reg(u64 insn);
+ void FCMP_rc(u64 insn);
+ void FCMP_cr(u64 insn);
+ void FCMP_imm(u64 insn);
+ void FFMA_reg(u64 insn);
+ void FFMA_rc(u64 insn);
+ void FFMA_cr(u64 insn);
+ void FFMA_imm(u64 insn);
+ void FFMA32I(u64 insn);
+ void FLO_reg(u64 insn);
+ void FLO_cbuf(u64 insn);
+ void FLO_imm(u64 insn);
+ void FMNMX_reg(u64 insn);
+ void FMNMX_cbuf(u64 insn);
+ void FMNMX_imm(u64 insn);
+ void FMUL_reg(u64 insn);
+ void FMUL_cbuf(u64 insn);
+ void FMUL_imm(u64 insn);
+ void FMUL32I(u64 insn);
+ void FSET_reg(u64 insn);
+ void FSET_cbuf(u64 insn);
+ void FSET_imm(u64 insn);
+ void FSETP_reg(u64 insn);
+ void FSETP_cbuf(u64 insn);
+ void FSETP_imm(u64 insn);
+ void FSWZADD(u64 insn);
+ void GETCRSPTR(u64 insn);
+ void GETLMEMBASE(u64 insn);
+ void HADD2_reg(u64 insn);
+ void HADD2_cbuf(u64 insn);
+ void HADD2_imm(u64 insn);
+ void HADD2_32I(u64 insn);
+ void HFMA2_reg(u64 insn);
+ void HFMA2_rc(u64 insn);
+ void HFMA2_cr(u64 insn);
+ void HFMA2_imm(u64 insn);
+ void HFMA2_32I(u64 insn);
+ void HMUL2_reg(u64 insn);
+ void HMUL2_cbuf(u64 insn);
+ void HMUL2_imm(u64 insn);
+ void HMUL2_32I(u64 insn);
+ void HSET2_reg(u64 insn);
+ void HSET2_cbuf(u64 insn);
+ void HSET2_imm(u64 insn);
+ void HSETP2_reg(u64 insn);
+ void HSETP2_cbuf(u64 insn);
+ void HSETP2_imm(u64 insn);
+ void I2F_reg(u64 insn);
+ void I2F_cbuf(u64 insn);
+ void I2F_imm(u64 insn);
+ void I2I_reg(u64 insn);
+ void I2I_cbuf(u64 insn);
+ void I2I_imm(u64 insn);
+ void IADD_reg(u64 insn);
+ void IADD_cbuf(u64 insn);
+ void IADD_imm(u64 insn);
+ void IADD3_reg(u64 insn);
+ void IADD3_cbuf(u64 insn);
+ void IADD3_imm(u64 insn);
+ void IADD32I(u64 insn);
+ void ICMP_reg(u64 insn);
+ void ICMP_rc(u64 insn);
+ void ICMP_cr(u64 insn);
+ void ICMP_imm(u64 insn);
+ void IDE(u64 insn);
+ void IDP_reg(u64 insn);
+ void IDP_imm(u64 insn);
+ void IMAD_reg(u64 insn);
+ void IMAD_rc(u64 insn);
+ void IMAD_cr(u64 insn);
+ void IMAD_imm(u64 insn);
+ void IMAD32I(u64 insn);
+ void IMADSP_reg(u64 insn);
+ void IMADSP_rc(u64 insn);
+ void IMADSP_cr(u64 insn);
+ void IMADSP_imm(u64 insn);
+ void IMNMX_reg(u64 insn);
+ void IMNMX_cbuf(u64 insn);
+ void IMNMX_imm(u64 insn);
+ void IMUL_reg(u64 insn);
+ void IMUL_cbuf(u64 insn);
+ void IMUL_imm(u64 insn);
+ void IMUL32I(u64 insn);
+ void IPA(u64 insn);
+ void ISBERD(u64 insn);
+ void ISCADD_reg(u64 insn);
+ void ISCADD_cbuf(u64 insn);
+ void ISCADD_imm(u64 insn);
+ void ISCADD32I(u64 insn);
+ void ISET_reg(u64 insn);
+ void ISET_cbuf(u64 insn);
+ void ISET_imm(u64 insn);
+ void ISETP_reg(u64 insn);
+ void ISETP_cbuf(u64 insn);
+ void ISETP_imm(u64 insn);
+ void JCAL(u64 insn);
+ void JMP(u64 insn);
+ void JMX(u64 insn);
+ void KIL();
+ void LD(u64 insn);
+ void LDC(u64 insn);
+ void LDG(u64 insn);
+ void LDL(u64 insn);
+ void LDS(u64 insn);
+ void LEA_hi_reg(u64 insn);
+ void LEA_hi_cbuf(u64 insn);
+ void LEA_lo_reg(u64 insn);
+ void LEA_lo_cbuf(u64 insn);
+ void LEA_lo_imm(u64 insn);
+ void LEPC(u64 insn);
+ void LONGJMP(u64 insn);
+ void LOP_reg(u64 insn);
+ void LOP_cbuf(u64 insn);
+ void LOP_imm(u64 insn);
+ void LOP3_reg(u64 insn);
+ void LOP3_cbuf(u64 insn);
+ void LOP3_imm(u64 insn);
+ void LOP32I(u64 insn);
+ void MEMBAR(u64 insn);
+ void MOV_reg(u64 insn);
+ void MOV_cbuf(u64 insn);
+ void MOV_imm(u64 insn);
+ void MOV32I(u64 insn);
+ void MUFU(u64 insn);
+ void NOP(u64 insn);
+ void OUT_reg(u64 insn);
+ void OUT_cbuf(u64 insn);
+ void OUT_imm(u64 insn);
+ void P2R_reg(u64 insn);
+ void P2R_cbuf(u64 insn);
+ void P2R_imm(u64 insn);
+ void PBK();
+ void PCNT();
+ void PEXIT(u64 insn);
+ void PIXLD(u64 insn);
+ void PLONGJMP(u64 insn);
+ void POPC_reg(u64 insn);
+ void POPC_cbuf(u64 insn);
+ void POPC_imm(u64 insn);
+ void PRET(u64 insn);
+ void PRMT_reg(u64 insn);
+ void PRMT_rc(u64 insn);
+ void PRMT_cr(u64 insn);
+ void PRMT_imm(u64 insn);
+ void PSET(u64 insn);
+ void PSETP(u64 insn);
+ void R2B(u64 insn);
+ void R2P_reg(u64 insn);
+ void R2P_cbuf(u64 insn);
+ void R2P_imm(u64 insn);
+ void RAM(u64 insn);
+ void RED(u64 insn);
+ void RET(u64 insn);
+ void RRO_reg(u64 insn);
+ void RRO_cbuf(u64 insn);
+ void RRO_imm(u64 insn);
+ void RTT(u64 insn);
+ void S2R(u64 insn);
+ void SAM(u64 insn);
+ void SEL_reg(u64 insn);
+ void SEL_cbuf(u64 insn);
+ void SEL_imm(u64 insn);
+ void SETCRSPTR(u64 insn);
+ void SETLMEMBASE(u64 insn);
+ void SHF_l_reg(u64 insn);
+ void SHF_l_imm(u64 insn);
+ void SHF_r_reg(u64 insn);
+ void SHF_r_imm(u64 insn);
+ void SHFL(u64 insn);
+ void SHL_reg(u64 insn);
+ void SHL_cbuf(u64 insn);
+ void SHL_imm(u64 insn);
+ void SHR_reg(u64 insn);
+ void SHR_cbuf(u64 insn);
+ void SHR_imm(u64 insn);
+ void SSY();
+ void ST(u64 insn);
+ void STG(u64 insn);
+ void STL(u64 insn);
+ void STP(u64 insn);
+ void STS(u64 insn);
+ void SUATOM(u64 insn);
+ void SUATOM_cas(u64 insn);
+ void SULD(u64 insn);
+ void SURED(u64 insn);
+ void SUST(u64 insn);
+ void SYNC(u64 insn);
+ void TEX(u64 insn);
+ void TEX_b(u64 insn);
+ void TEXS(u64 insn);
+ void TLD(u64 insn);
+ void TLD_b(u64 insn);
+ void TLD4(u64 insn);
+ void TLD4_b(u64 insn);
+ void TLD4S(u64 insn);
+ void TLDS(u64 insn);
+ void TMML(u64 insn);
+ void TMML_b(u64 insn);
+ void TXA(u64 insn);
+ void TXD(u64 insn);
+ void TXD_b(u64 insn);
+ void TXQ(u64 insn);
+ void TXQ_b(u64 insn);
+ void VABSDIFF(u64 insn);
+ void VABSDIFF4(u64 insn);
+ void VADD(u64 insn);
+ void VMAD(u64 insn);
+ void VMNMX(u64 insn);
+ void VOTE(u64 insn);
+ void VOTE_vtg(u64 insn);
+ void VSET(u64 insn);
+ void VSETP(u64 insn);
+ void VSHL(u64 insn);
+ void VSHR(u64 insn);
+ void XMAD_reg(u64 insn);
+ void XMAD_rc(u64 insn);
+ void XMAD_cr(u64 insn);
+ void XMAD_imm(u64 insn);
+
+ [[nodiscard]] IR::U32 X(IR::Reg reg);
+ [[nodiscard]] IR::U64 L(IR::Reg reg);
+ [[nodiscard]] IR::F32 F(IR::Reg reg);
+ [[nodiscard]] IR::F64 D(IR::Reg reg);
+
+ void X(IR::Reg dest_reg, const IR::U32& value);
+ void L(IR::Reg dest_reg, const IR::U64& value);
+ void F(IR::Reg dest_reg, const IR::F32& value);
+ void D(IR::Reg dest_reg, const IR::F64& value);
+
+ [[nodiscard]] IR::U32 GetReg8(u64 insn);
+ [[nodiscard]] IR::U32 GetReg20(u64 insn);
+ [[nodiscard]] IR::U32 GetReg39(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
+
+ [[nodiscard]] IR::U32 GetCbuf(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
+ [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
+
+ [[nodiscard]] IR::U32 GetImm20(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
+ [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
+
+ [[nodiscard]] IR::U32 GetImm32(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
+
+ void SetZFlag(const IR::U1& value);
+ void SetSFlag(const IR::U1& value);
+ void SetCFlag(const IR::U1& value);
+ void SetOFlag(const IR::U1& value);
+
+ void ResetZero();
+ void ResetSFlag();
+ void ResetCFlag();
+ void ResetOFlag();
+};
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
+ bool cc) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const iadd{insn};
+
+ if (sat) {
+ throw NotImplementedException("IADD SAT");
+ }
+ if (x && po) {
+ throw NotImplementedException("IADD X+PO");
+ }
+ // Operand A is always read from here, negated if needed
+ IR::U32 op_a{v.X(iadd.src_a)};
+ if (neg_a) {
+ op_a = v.ir.INeg(op_a);
+ }
+ // Add both operands
+ IR::U32 result{v.ir.IAdd(op_a, op_b)};
+ if (x) {
+ const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+ result = v.ir.IAdd(result, carry);
+ }
+ if (po) {
+ // .PO adds one to the result
+ result = v.ir.IAdd(result, v.ir.Imm32(1));
+ }
+ if (cc) {
+ // Store flags
+ // TODO: Does this grab the result pre-PO or after?
+ if (po) {
+ throw NotImplementedException("IADD CC+PO");
+ }
+ // TODO: How does CC behave when X is set?
+ if (x) {
+ throw NotImplementedException("IADD X+CC");
+ }
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.SetCFlag(v.ir.GetCarryFromOp(result));
+ v.SetOFlag(v.ir.GetOverflowFromOp(result));
+ }
+ // Store result
+ v.X(iadd.dest_reg, result);
+}
+
+void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+ union {
+ u64 insn;
+ BitField<43, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ BitField<48, 2, u64> three_for_po;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_a;
+ BitField<50, 1, u64> sat;
+ } const iadd{insn};
+
+ const bool po{iadd.three_for_po == 3};
+ if (!po && iadd.neg_b != 0) {
+ op_b = v.ir.INeg(op_b);
+ }
+ IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD_reg(u64 insn) {
+ IADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IADD_cbuf(u64 insn) {
+ IADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IADD_imm(u64 insn) {
+ IADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::IADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 1, u64> x;
+ BitField<54, 1, u64> sat;
+ BitField<55, 2, u64> three_for_po;
+ BitField<56, 1, u64> neg_a;
+ } const iadd32i{insn};
+
+ const bool po{iadd32i.three_for_po == 3};
+ const bool neg_a{!po && iadd32i.neg_a != 0};
+ IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Shift : u64 {
+ None,
+ Right,
+ Left,
+};
+enum class Half : u64 {
+ All,
+ Lower,
+ Upper,
+};
+
+[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
+ constexpr bool is_signed{false};
+ switch (half) {
+ case Half::All:
+ return value;
+ case Half::Lower:
+ return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
+ case Half::Upper:
+ return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
+ }
+ throw NotImplementedException("Invalid half");
+}
+
+[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
+ switch (shift) {
+ case Shift::None:
+ return value;
+ case Shift::Right: {
+ // 33-bit RS IADD3 edge case
+ const IR::U1 edge_case{ir.GetCarryFromOp(value)};
+ const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
+ return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
+ }
+ case Shift::Left:
+ return ir.ShiftLeftLogical(value, ir.Imm32(16));
+ }
+ throw NotImplementedException("Invalid shift");
+}
+
+void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
+ Shift shift = Shift::None) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> x;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> neg_b;
+ BitField<51, 1, u64> neg_a;
+ } iadd3{insn};
+
+ if (iadd3.neg_a != 0) {
+ op_a = v.ir.INeg(op_a);
+ }
+ if (iadd3.neg_b != 0) {
+ op_b = v.ir.INeg(op_b);
+ }
+ if (iadd3.neg_c != 0) {
+ op_c = v.ir.INeg(op_c);
+ }
+ IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
+ if (iadd3.x != 0) {
+ // TODO: How does RS behave when X is set?
+ if (shift == Shift::Right) {
+ throw NotImplementedException("IADD3 X+RS");
+ }
+ const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+ lhs_1 = v.ir.IAdd(lhs_1, carry);
+ }
+ const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
+ const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
+
+ v.X(iadd3.dest_reg, result);
+ if (iadd3.cc != 0) {
+ // TODO: How does CC behave when X is set?
+ if (iadd3.x != 0) {
+ throw NotImplementedException("IADD3 X+CC");
+ }
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.SetCFlag(v.ir.GetCarryFromOp(result));
+ const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
+ v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD3_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<37, 2, Shift> shift;
+ BitField<35, 2, Half> half_a;
+ BitField<33, 2, Half> half_b;
+ BitField<31, 2, Half> half_c;
+ } const iadd3{insn};
+
+ const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
+ const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
+ const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
+ IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
+}
+
+void TranslatorVisitor::IADD3_cbuf(u64 insn) {
+ IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::IADD3_imm(u64 insn) {
+ IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const icmp{insn};
+
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const bool is_signed{icmp.is_signed != 0};
+ const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
+
+ const IR::U32 src_reg{v.X(icmp.src_reg)};
+ const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+ v.X(icmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ICMP_reg(u64 insn) {
+ ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_rc(u64 insn) {
+ ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::ICMP_cr(u64 insn) {
+ ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_imm(u64 insn) {
+ ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed, bool x) {
+ return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+ : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> x;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const iset{insn};
+
+ const IR::U32 src_a{v.X(iset.src_reg)};
+ const bool is_signed{iset.is_signed != 0};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const bool x{iset.x != 0};
+ const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
+
+ IR::U1 pred{v.ir.GetPred(iset.pred)};
+ if (iset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(iset.dest_reg, result);
+ if (iset.cc != 0) {
+ if (x) {
+ throw NotImplementedException("ISET.CC + X");
+ }
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (iset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISET_reg(u64 insn) {
+ ISET(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISET_cbuf(u64 insn) {
+ ISET(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISET_imm(u64 insn) {
+ ISET(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+};
+
+enum class IntFormat : u64 {
+ U8 = 0,
+ U16 = 1,
+ U32 = 2,
+ U64 = 3,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, FloatFormat> float_format;
+ BitField<10, 2, IntFormat> int_format;
+ BitField<13, 1, u64> is_signed;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<41, 2, u64> selector;
+ BitField<47, 1, u64> cc;
+ BitField<45, 1, u64> neg;
+ BitField<49, 1, u64> abs;
+};
+
+bool Is64(u64 insn) {
+ return Encoding{insn}.int_format == IntFormat::U64;
+}
+
+int BitSize(FloatFormat format) {
+ switch (format) {
+ case FloatFormat::F16:
+ return 16;
+ case FloatFormat::F32:
+ return 32;
+ case FloatFormat::F64:
+ return 64;
+ }
+ throw NotImplementedException("Invalid float format {}", format);
+}
+
+IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
+ const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
+ const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
+ const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
+ const IR::U1 is_least{v.ir.IEqual(value, least_value)};
+ return IR::U32{v.ir.Select(is_least, value, absolute)};
+}
+
+void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
+ const Encoding i2f{insn};
+ if (i2f.cc != 0) {
+ throw NotImplementedException("I2F CC");
+ }
+ const bool is_signed{i2f.is_signed != 0};
+ int src_bitsize{};
+ switch (i2f.int_format) {
+ case IntFormat::U8:
+ src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+ v.ir.Imm32(8), is_signed);
+ if (i2f.abs != 0) {
+ src = SmallAbs(v, src, 8);
+ }
+ src_bitsize = 8;
+ break;
+ case IntFormat::U16:
+ if (i2f.selector == 1 || i2f.selector == 3) {
+ throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
+ }
+ src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+ v.ir.Imm32(16), is_signed);
+ if (i2f.abs != 0) {
+ src = SmallAbs(v, src, 16);
+ }
+ src_bitsize = 16;
+ break;
+ case IntFormat::U32:
+ case IntFormat::U64:
+ if (i2f.selector != 0) {
+ throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
+ }
+ if (i2f.abs != 0 && is_signed) {
+ src = v.ir.IAbs(src);
+ }
+ src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
+ break;
+ }
+ const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
+ const int dst_bitsize{BitSize(i2f.float_format)};
+ const IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = CastFpRounding(i2f.fp_rounding),
+ .fmz_mode = IR::FmzMode::DontCare,
+ };
+ auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
+ static_cast<size_t>(conversion_src_bitsize), is_signed, src,
+ fp_control)};
+ if (i2f.neg != 0) {
+ if (i2f.abs != 0 || !is_signed) {
+ // We know the value is positive
+ value = v.ir.FPNeg(value);
+ } else {
+ // Only negate if the input isn't the lowest value
+ IR::U1 is_least;
+ if (src_bitsize == 64) {
+ is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
+ } else if (src_bitsize == 32) {
+ is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
+ } else {
+ const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
+ is_least = v.ir.IEqual(src, least_value);
+ }
+ value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
+ }
+ }
+ switch (i2f.float_format) {
+ case FloatFormat::F16: {
+ const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+ v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
+ break;
+ }
+ case FloatFormat::F32:
+ v.F(i2f.dest_reg, value);
+ break;
+ case FloatFormat::F64: {
+ if (!IR::IsAligned(i2f.dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
+ }
+ const IR::Value vector{v.ir.UnpackDouble2x32(value)};
+ for (int i = 0; i < 2; ++i) {
+ v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2F_reg(u64 insn) {
+ if (Is64(insn)) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> reg;
+ } const value{insn};
+ const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
+ I2F(*this, insn, ir.PackUint2x32(regs));
+ } else {
+ I2F(*this, insn, GetReg20(insn));
+ }
+}
+
+void TranslatorVisitor::I2F_cbuf(u64 insn) {
+ if (Is64(insn)) {
+ I2F(*this, insn, GetPackedCbuf(insn));
+ } else {
+ I2F(*this, insn, GetCbuf(insn));
+ }
+}
+
+void TranslatorVisitor::I2F_imm(u64 insn) {
+ if (Is64(insn)) {
+ I2F(*this, insn, GetPackedImm20(insn));
+ } else {
+ I2F(*this, insn, GetImm20(insn));
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class MaxShift : u64 {
+ U32,
+ Undefined,
+ U64,
+ S64,
+};
+
+IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
+ bool right_shift, bool is_signed) {
+ if (!right_shift) {
+ return ir.ShiftLeftLogical(packed_int, safe_shift);
+ }
+ if (is_signed) {
+ return ir.ShiftRightArithmetic(packed_int, safe_shift);
+ }
+ return ir.ShiftRightLogical(packed_int, safe_shift);
+}
+
+void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
+ bool right_shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<0, 8, IR::Reg> lo_bits_reg;
+ BitField<37, 2, MaxShift> max_shift;
+ BitField<47, 1, u64> cc;
+ BitField<48, 2, u64> x_mode;
+ BitField<50, 1, u64> wrap;
+ } const shf{insn};
+
+ if (shf.cc != 0) {
+ throw NotImplementedException("SHF CC");
+ }
+ if (shf.x_mode != 0) {
+ throw NotImplementedException("SHF X Mode");
+ }
+ if (shf.max_shift == MaxShift::Undefined) {
+ throw NotImplementedException("SHF Use of undefined MaxShift value");
+ }
+ const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
+ const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
+ const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
+ const IR::U32 safe_shift{shf.wrap != 0
+ ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
+ : v.ir.UMin(shift, max_shift)};
+
+ const bool is_signed{shf.max_shift == MaxShift::S64};
+ const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
+ const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
+
+ const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
+ v.X(shf.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHF_l_reg(u64 insn) {
+ SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_l_imm(u64 insn) {
+ SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_r_reg(u64 insn) {
+ SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
+}
+
+void TranslatorVisitor::SHF_r_imm(u64 insn) {
+ SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 2, u64> mode;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const imnmx{insn};
+
+ if (imnmx.cc != 0) {
+ throw NotImplementedException("IMNMX CC");
+ }
+
+ if (imnmx.mode != 0) {
+ throw NotImplementedException("IMNMX.MODE");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
+ const IR::U32 op_a{v.X(imnmx.src_reg)};
+ IR::U32 min;
+ IR::U32 max;
+
+ if (imnmx.is_signed != 0) {
+ min = IR::U32{v.ir.SMin(op_a, op_b)};
+ max = IR::U32{v.ir.SMax(op_a, op_b)};
+ } else {
+ min = IR::U32{v.ir.UMin(op_a, op_b)};
+ max = IR::U32{v.ir.UMax(op_a, op_b)};
+ }
+ if (imnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+
+ const IR::U32 result{v.ir.Select(pred, min, max)};
+ v.X(imnmx.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IMNMX_reg(u64 insn) {
+ IMNMX(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
+ IMNMX(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IMNMX_imm(u64 insn) {
+ IMNMX(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<40, 1, u64> tilde;
+ } const popc{insn};
+
+ const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
+ const IR::U32 result = v.ir.BitCount(operand);
+ v.X(popc.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::POPC_reg(u64 insn) {
+ POPC(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::POPC_cbuf(u64 insn) {
+ POPC(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::POPC_imm(u64 insn) {
+ POPC(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
+ u64 scale_imm) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> op_a;
+ } const iscadd{insn};
+
+ const bool po{neg_a && neg_b};
+ IR::U32 op_a{v.X(iscadd.op_a)};
+ if (po) {
+ // When PO is present, add one
+ op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
+ } else {
+ // When PO is not present, the bits are interpreted as negation
+ if (neg_a) {
+ op_a = v.ir.INeg(op_a);
+ }
+ if (neg_b) {
+ op_b = v.ir.INeg(op_b);
+ }
+ }
+ // With the operands already processed, scale A
+ const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
+ const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
+
+ const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
+ v.X(iscadd.dest_reg, result);
+
+ if (cc) {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ const IR::U1 carry{v.ir.GetCarryFromOp(result)};
+ const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
+ v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
+ v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
+ }
+}
+
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+ union {
+ u64 raw;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_a;
+ BitField<39, 5, u64> scale;
+ } const iscadd{insn};
+
+ ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISCADD_reg(u64 insn) {
+ ISCADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
+ ISCADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISCADD_imm(u64 insn) {
+ ISCADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::ISCADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 5, u64> scale;
+ } const iscadd{insn};
+
+ return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed, bool x) {
+ return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+ : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> x;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const isetp{insn};
+
+ const bool is_signed{isetp.is_signed != 0};
+ const bool x{isetp.x != 0};
+ const BooleanOp bop{isetp.bop};
+ const CompareOp compare_op{isetp.compare_op};
+ const IR::U32 op_a{v.X(isetp.src_reg_a)};
+ const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
+ const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(isetp.dest_pred_a, result_a);
+ v.ir.SetPred(isetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISETP_reg(u64 insn) {
+ ISETP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISETP_cbuf(u64 insn) {
+ ISETP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISETP_imm(u64 insn) {
+ ISETP(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 1, u64> w;
+ BitField<43, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ } const shl{insn};
+
+ if (shl.x != 0) {
+ throw NotImplementedException("SHL.X");
+ }
+ if (shl.cc != 0) {
+ throw NotImplementedException("SHL.CC");
+ }
+ const IR::U32 base{v.X(shl.src_reg_a)};
+ IR::U32 result;
+ if (shl.w != 0) {
+ // When .W is set, the shift value is wrapped
+ // To emulate this we just have to wrap it ourselves.
+ const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
+ result = v.ir.ShiftLeftLogical(base, shift);
+ } else {
+ // When .W is not set, the shift value is clamped between 0 and 32.
+ // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
+ // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
+ //
+ // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
+ // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
+ // or equal to the bit width of the components of Base."
+ //
+ // And on the GLASM specification it is also safe to evaluate out of bounds:
+ //
+ // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
+ // "The results of a shift operation ("<<") are undefined if the value of the second operand
+ // is negative, or greater than or equal to the number of bits in the first operand."
+ //
+ // Emphasis on undefined results in contrast to undefined behavior.
+ //
+ const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
+ const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
+ result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
+ }
+ v.X(shl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHL_reg(u64 insn) {
+ SHL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHL_cbuf(u64 insn) {
+ SHL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHL_imm(u64 insn) {
+ SHL(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 1, u64> is_wrapped;
+ BitField<40, 1, u64> brev;
+ BitField<43, 1, u64> xmode;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const shr{insn};
+
+ if (shr.xmode != 0) {
+ throw NotImplementedException("SHR.XMODE");
+ }
+ if (shr.cc != 0) {
+ throw NotImplementedException("SHR.CC");
+ }
+
+ IR::U32 base{v.X(shr.src_reg_a)};
+ if (shr.brev == 1) {
+ base = v.ir.BitReverse(base);
+ }
+ IR::U32 result;
+ const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
+ if (shr.is_signed == 1) {
+ result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
+ } else {
+ result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
+ }
+
+ if (shr.is_wrapped == 0) {
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 safe_bits{v.ir.Imm32(32)};
+
+ const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
+ const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
+ const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+ result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
+ }
+ v.X(shr.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHR_reg(u64 insn) {
+ SHR(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHR_cbuf(u64 insn) {
+ SHR(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHR_imm(u64 insn) {
+ SHR(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SelectMode : u64 {
+ Default,
+ CLO,
+ CHI,
+ CSFU,
+ CBCC,
+};
+
+enum class Half : u64 {
+ H0, // Least-significant bits (15:0)
+ H1, // Most-significant bits (31:16)
+};
+
+IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
+ const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
+ return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
+}
+
+void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
+ SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_a_signed;
+ BitField<49, 1, u64> is_b_signed;
+ BitField<53, 1, Half> half_a;
+ } const xmad{insn};
+
+ if (x) {
+ throw NotImplementedException("XMAD X");
+ }
+ const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
+ const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
+
+ IR::U32 product{v.ir.IMul(op_a, op_b)};
+ if (psl) {
+ // .PSL shifts the product 16 bits
+ product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
+ }
+ const IR::U32 op_c{[&]() -> IR::U32 {
+ switch (select_mode) {
+ case SelectMode::Default:
+ return src_c;
+ case SelectMode::CLO:
+ return ExtractHalf(v, src_c, Half::H0, false);
+ case SelectMode::CHI:
+ return ExtractHalf(v, src_c, Half::H1, false);
+ case SelectMode::CBCC:
+ return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
+ case SelectMode::CSFU:
+ throw NotImplementedException("XMAD CSFU");
+ }
+ throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
+ }()};
+ IR::U32 result{v.ir.IAdd(product, op_c)};
+ if (mrg) {
+ // .MRG inserts src_b [15:0] into result's [31:16].
+ const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
+ result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
+ }
+ if (xmad.cc) {
+ throw NotImplementedException("XMAD CC");
+ }
+ // Store result
+ v.X(xmad.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::XMAD_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<35, 1, Half> half_b;
+ BitField<36, 1, u64> psl;
+ BitField<37, 1, u64> mrg;
+ BitField<38, 1, u64> x;
+ BitField<50, 3, SelectMode> select_mode;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+ xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_rc(u64 insn) {
+ union {
+ u64 raw;
+ BitField<50, 2, SelectMode> select_mode;
+ BitField<52, 1, Half> half_b;
+ BitField<54, 1, u64> x;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
+ xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_cr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<50, 2, SelectMode> select_mode;
+ BitField<52, 1, Half> half_b;
+ BitField<54, 1, u64> x;
+ BitField<55, 1, u64> psl;
+ BitField<56, 1, u64> mrg;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+ xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 16, u64> src_b;
+ BitField<36, 1, u64> psl;
+ BitField<37, 1, u64> mrg;
+ BitField<38, 1, u64> x;
+ BitField<50, 3, SelectMode> select_mode;
+ } const xmad{insn};
+
+ XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
+ Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class IntegerWidth : u64 {
+ Byte,
+ Short,
+ Word,
+};
+
+[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
+ switch (width) {
+ case IntegerWidth::Byte:
+ return ir.Imm32(8);
+ case IntegerWidth::Short:
+ return ir.Imm32(16);
+ case IntegerWidth::Word:
+ return ir.Imm32(32);
+ default:
+ throw NotImplementedException("Invalid width {}", width);
+ }
+}
+
+[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
+ IntegerWidth dst_width) {
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 count{WidthSize(ir, dst_width)};
+ return ir.BitFieldExtract(src, zero, count, false);
+}
+
+[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
+ bool dst_signed, bool src_signed) {
+ IR::U32 min{};
+ IR::U32 max{};
+ const IR::U32 zero{ir.Imm32(0)};
+ switch (dst_width) {
+ case IntegerWidth::Byte:
+ min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
+ max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
+ break;
+ case IntegerWidth::Short:
+ min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
+ max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
+ break;
+ case IntegerWidth::Word:
+ min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
+ max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
+ break;
+ default:
+ throw NotImplementedException("Invalid width {}", dst_width);
+ }
+ const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
+ return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
+}
+
+void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, IntegerWidth> dst_fmt;
+ BitField<12, 1, u64> dst_fmt_sign;
+ BitField<10, 2, IntegerWidth> src_fmt;
+ BitField<13, 1, u64> src_fmt_sign;
+ BitField<41, 3, u64> selector;
+ BitField<45, 1, u64> neg;
+ BitField<47, 1, u64> cc;
+ BitField<49, 1, u64> abs;
+ BitField<50, 1, u64> sat;
+ } const i2i{insn};
+
+ if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
+ throw NotImplementedException("16-bit source format incompatible with selector {}",
+ i2i.selector);
+ }
+ if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
+ throw NotImplementedException("32-bit source format incompatible with selector {}",
+ i2i.selector);
+ }
+
+ const s32 selector{static_cast<s32>(i2i.selector)};
+ const IR::U32 offset{v.ir.Imm32(selector * 8)};
+ const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
+ const bool src_signed{i2i.src_fmt_sign != 0};
+ const bool dst_signed{i2i.dst_fmt_sign != 0};
+ const bool sat{i2i.sat != 0};
+
+ IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
+ if (i2i.abs != 0) {
+ src_values = v.ir.IAbs(src_values);
+ }
+ if (i2i.neg != 0) {
+ src_values = v.ir.INeg(src_values);
+ }
+ const IR::U32 result{
+ sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
+ : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
+
+ v.X(i2i.dest_reg, result);
+ if (i2i.cc != 0) {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2I_reg(u64 insn) {
+ I2I(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::I2I_cbuf(u64 insn) {
+ I2I(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::I2I_imm(u64 insn) {
+ I2I(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Default,
+ Patch,
+ Prim,
+ Attr,
+};
+
+enum class Shift : u64 {
+ Default,
+ U16,
+ B32,
+};
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISBERD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<31, 1, u64> skew;
+ BitField<32, 1, u64> o;
+ BitField<33, 2, Mode> mode;
+ BitField<47, 2, Shift> shift;
+ } const isberd{insn};
+
+ if (isberd.skew != 0) {
+ throw NotImplementedException("SKEW");
+ }
+ if (isberd.o != 0) {
+ throw NotImplementedException("O");
+ }
+ if (isberd.mode != Mode::Default) {
+ throw NotImplementedException("Mode {}", isberd.mode.Value());
+ }
+ if (isberd.shift != Shift::Default) {
+ throw NotImplementedException("Shift {}", isberd.shift.Value());
+ }
+ LOG_WARNING(Shader, "(STUBBED) called");
+ X(isberd.dest_reg, X(isberd.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+using namespace LDC;
+namespace {
+std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
+ const IR::U32& reg, const IR::U32& imm) {
+ switch (mode) {
+ case Mode::Default:
+ return {imm_index, ir.IAdd(reg, imm)};
+ default:
+ break;
+ }
+ throw NotImplementedException("Mode {}", mode);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDC(u64 insn) {
+ const Encoding ldc{insn};
+ const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
+ const IR::U32 reg{X(ldc.src_reg)};
+ const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
+ const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
+ switch (ldc.size) {
+ case Size::U8:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
+ break;
+ case Size::S8:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
+ break;
+ case Size::U16:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
+ break;
+ case Size::S16:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
+ break;
+ case Size::B32:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
+ break;
+ case Size::B64: {
+ if (!IR::IsAligned(ldc.dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register");
+ }
+ const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
+ for (int i = 0; i < 2; ++i) {
+ X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid size {}", ldc.size.Value());
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell::LDC {
+
+enum class Mode : u64 {
+ Default,
+ IL,
+ IS,
+ ISL,
+};
+
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 16, s64> offset;
+ BitField<36, 5, u64> index;
+ BitField<44, 2, Mode> mode;
+ BitField<48, 3, Size> size;
+};
+
+} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
+ bool neg, bool x) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_lo_reg;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> pred;
+ } const lea{insn};
+
+ if (x) {
+ throw NotImplementedException("LEA.HI X");
+ }
+ if (lea.pred != IR::Pred::PT) {
+ throw NotImplementedException("LEA.HI Pred");
+ }
+ if (lea.cc != 0) {
+ throw NotImplementedException("LEA.HI CC");
+ }
+
+ const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+ const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
+ const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
+
+ const s32 hi_scale{32 - static_cast<s32>(scale)};
+ const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
+ const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
+
+ IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
+ v.X(lea.dest_reg, result);
+}
+
+void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_lo_reg;
+ BitField<39, 5, u64> scale;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> pred;
+ } const lea{insn};
+ if (lea.x != 0) {
+ throw NotImplementedException("LEA.LO X");
+ }
+ if (lea.pred != IR::Pred::PT) {
+ throw NotImplementedException("LEA.LO Pred");
+ }
+ if (lea.cc != 0) {
+ throw NotImplementedException("LEA.LO CC");
+ }
+
+ const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+ const s32 scale{static_cast<s32>(lea.scale)};
+ const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
+ const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
+
+ IR::U32 result{v.ir.IAdd(base, scaled_offset)};
+ v.X(lea.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LEA_hi_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<28, 5, u64> scale;
+ BitField<37, 1, u64> neg;
+ BitField<38, 1, u64> x;
+ } const lea{insn};
+
+ LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<51, 5, u64> scale;
+ BitField<56, 1, u64> neg;
+ BitField<57, 1, u64> x;
+ } const lea{insn};
+
+ LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_lo_reg(u64 insn) {
+ LEA_lo(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
+ LEA_lo(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LEA_lo_imm(u64 insn) {
+ LEA_lo(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ B32,
+ B64,
+ B96,
+ B128,
+};
+
+enum class InterpolationMode : u64 {
+ Pass,
+ Multiply,
+ Constant,
+ Sc,
+};
+
+enum class SampleMode : u64 {
+ Default,
+ Centroid,
+ Offset,
+};
+
+u32 NumElements(Size size) {
+ switch (size) {
+ case Size::B32:
+ return 1;
+ case Size::B64:
+ return 2;
+ case Size::B96:
+ return 3;
+ case Size::B128:
+ return 4;
+ }
+ throw InvalidArgument("Invalid size {}", size);
+}
+
+template <typename F>
+void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
+ const IR::U32 index_value{v.X(index_reg)};
+ for (u32 element = 0; element < num_elements; ++element) {
+ const IR::U32 final_offset{
+ element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
+ f(element, final_offset);
+ }
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ALD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 10, u64> absolute_offset;
+ BitField<20, 11, s64> relative_offset;
+ BitField<39, 8, IR::Reg> vertex_reg;
+ BitField<32, 1, u64> o;
+ BitField<31, 1, u64> patch;
+ BitField<47, 2, Size> size;
+ } const ald{insn};
+
+ const u64 offset{ald.absolute_offset.Value()};
+ if (offset % 4 != 0) {
+ throw NotImplementedException("Unaligned absolute offset {}", offset);
+ }
+ const IR::U32 vertex{X(ald.vertex_reg)};
+ const u32 num_elements{NumElements(ald.size)};
+ if (ald.index_reg == IR::Reg::RZ) {
+ for (u32 element = 0; element < num_elements; ++element) {
+ if (ald.patch != 0) {
+ const IR::Patch patch{offset / 4 + element};
+ F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
+ } else {
+ const IR::Attribute attr{offset / 4 + element};
+ F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
+ }
+ }
+ return;
+ }
+ if (ald.patch != 0) {
+ throw NotImplementedException("Indirect patch read");
+ }
+ HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+ F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
+ });
+}
+
+void TranslatorVisitor::AST(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 10, u64> absolute_offset;
+ BitField<20, 11, s64> relative_offset;
+ BitField<31, 1, u64> patch;
+ BitField<39, 8, IR::Reg> vertex_reg;
+ BitField<47, 2, Size> size;
+ } const ast{insn};
+
+ if (ast.index_reg != IR::Reg::RZ) {
+ throw NotImplementedException("Indexed store");
+ }
+ const u64 offset{ast.absolute_offset.Value()};
+ if (offset % 4 != 0) {
+ throw NotImplementedException("Unaligned absolute offset {}", offset);
+ }
+ const IR::U32 vertex{X(ast.vertex_reg)};
+ const u32 num_elements{NumElements(ast.size)};
+ if (ast.index_reg == IR::Reg::RZ) {
+ for (u32 element = 0; element < num_elements; ++element) {
+ if (ast.patch != 0) {
+ const IR::Patch patch{offset / 4 + element};
+ ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
+ } else {
+ const IR::Attribute attr{offset / 4 + element};
+ ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
+ }
+ }
+ return;
+ }
+ if (ast.patch != 0) {
+ throw NotImplementedException("Indexed tessellation patch store");
+ }
+ HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+ ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
+ });
+}
+
+void TranslatorVisitor::IPA(u64 insn) {
+ // IPA is the instruction used to read varyings from a fragment shader.
+ // gl_FragCoord is mapped to the gl_Position attribute.
+ // It yields unknown results when used outside of the fragment shader stage.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 8, IR::Reg> multiplier;
+ BitField<30, 8, IR::Attribute> attribute;
+ BitField<38, 1, u64> idx;
+ BitField<51, 1, u64> sat;
+ BitField<52, 2, SampleMode> sample_mode;
+ BitField<54, 2, InterpolationMode> interpolation_mode;
+ } const ipa{insn};
+
+ // Indexed IPAs are used for indexed varyings.
+ // For example:
+ //
+ // in vec4 colors[4];
+ // uniform int idx;
+ // void main() {
+ // gl_FragColor = colors[idx];
+ // }
+ const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
+ const IR::Attribute attribute{ipa.attribute};
+ IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
+ : ir.GetAttribute(attribute)};
+ if (IR::IsGeneric(attribute)) {
+ const ProgramHeader& sph{env.SPH()};
+ const u32 attr_index{IR::GenericAttributeIndex(attribute)};
+ const u32 element{static_cast<u32>(attribute) % 4};
+ const std::array input_map{sph.ps.GenericInputMap(attr_index)};
+ const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
+ if (is_perspective) {
+ const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
+ value = ir.FPMul(value, position_w);
+ }
+ }
+ if (ipa.interpolation_mode == InterpolationMode::Multiply) {
+ value = ir.FPMul(value, F(ipa.multiplier));
+ }
+
+ // Saturated IPAs are generally generated out of clamped varyings.
+ // For example: clamp(some_varying, 0.0, 1.0)
+ const bool is_saturated{ipa.sat != 0};
+ if (is_saturated) {
+ if (attribute == IR::Attribute::FrontFace) {
+ throw NotImplementedException("IPA.SAT on FrontFace");
+ }
+ value = ir.FPSaturate(value);
+ }
+
+ F(ipa.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<20, 24, u64> absolute_offset;
+ BitField<20, 24, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
+ const IR::U32 offset{Offset(v, insn)};
+ if (offset.IsImmediate()) {
+ return {v.ir.Imm32(offset.U32() / 4), offset};
+ } else {
+ return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
+ }
+}
+
+std::pair<int, bool> GetSize(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 3, Size> size;
+ } const encoding{insn};
+
+ switch (encoding.size) {
+ case Size::U8:
+ return {8, false};
+ case Size::S8:
+ return {8, true};
+ case Size::U16:
+ return {16, false};
+ case Size::S16:
+ return {16, true};
+ case Size::B32:
+ return {32, false};
+ case Size::B64:
+ return {64, false};
+ case Size::B128:
+ return {128, false};
+ default:
+ throw NotImplementedException("Invalid size {}", encoding.size.Value());
+ }
+}
+
+IR::Reg Reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> reg;
+ } const encoding{insn};
+
+ return encoding.reg;
+}
+
+IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
+}
+
+IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
+}
+
+IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
+ const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
+ const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
+ return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDL(u64 insn) {
+ const auto [word_offset, offset]{WordOffset(*this, insn)};
+ const IR::U32 word{LoadLocal(*this, word_offset, offset)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ X(dest, word);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
+ const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
+ X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::LDS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ X(dest, IR::U32{value});
+ break;
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ for (int element = 0; element < bit_size / 32; ++element) {
+ X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STL(u64 insn) {
+ const auto [word_offset, offset]{WordOffset(*this, insn)};
+ if (offset.IsImmediate()) {
+ // TODO: Support storing out of bounds at runtime
+ if (offset.U32() >= env.LocalMemorySize()) {
+ LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
+ offset.U32(), env.LocalMemorySize());
+ return;
+ }
+ }
+ const IR::Reg reg{Reg(insn)};
+ const IR::U32 src{X(reg)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned source register");
+ }
+ ir.WriteLocal(word_offset, src);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg reg{Reg(insn)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ ir.WriteShared(bit_size, offset, X(reg));
+ break;
+ case 64:
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
+ break;
+ case 128: {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
+ ir.WriteShared(128, offset, vector);
+ break;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LoadSize : u64 {
+ U8, // Zero-extend
+ S8, // Sign-extend
+ U16, // Zero-extend
+ S16, // Sign-extend
+ B32,
+ B64,
+ B128,
+ U128, // ???
+};
+
+enum class StoreSize : u64 {
+ U8, // Zero-extend
+ S8, // Sign-extend
+ U16, // Zero-extend
+ S16, // Sign-extend
+ B32,
+ B64,
+ B128,
+};
+
+// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class LoadCache : u64 {
+ CA, // Cache at all levels, likely to be accessed again
+ CG, // Cache at global level (cache in L2 and below, not L1)
+ CI, // ???
+ CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
+};
+
+// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class StoreCache : u64 {
+ WB, // Cache write-back all coherent levels
+ CG, // Cache at global level
+ CS, // Cache streaming, likely to be accessed once
+ WT, // Cache write-through (to system memory)
+};
+
+IR::U64 Address(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 24, s64> addr_offset;
+ BitField<20, 24, u64> rz_addr_offset;
+ BitField<45, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ // LDG/STG without .E uses a 32-bit pointer, zero-extend it
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ if (!IR::IsAligned(mem.addr_reg, 2)) {
+ throw NotImplementedException("Unaligned address register");
+ }
+ // Pack two registers to build the 64-bit address
+ return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ // Apply the offset
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDG(u64 insn) {
+ // LDG loads global memory into registers
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<46, 2, LoadCache> cache;
+ BitField<48, 3, LoadSize> size;
+ } const ldg{insn};
+
+ // Pointer to load data from
+ const IR::U64 address{Address(*this, insn)};
+ const IR::Reg dest_reg{ldg.dest_reg};
+ switch (ldg.size) {
+ case LoadSize::U8:
+ X(dest_reg, ir.LoadGlobalU8(address));
+ break;
+ case LoadSize::S8:
+ X(dest_reg, ir.LoadGlobalS8(address));
+ break;
+ case LoadSize::U16:
+ X(dest_reg, ir.LoadGlobalU16(address));
+ break;
+ case LoadSize::S16:
+ X(dest_reg, ir.LoadGlobalS16(address));
+ break;
+ case LoadSize::B32:
+ X(dest_reg, ir.LoadGlobal32(address));
+ break;
+ case LoadSize::B64: {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.LoadGlobal64(address)};
+ for (int i = 0; i < 2; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ case LoadSize::B128:
+ case LoadSize::U128: {
+ if (!IR::IsAligned(dest_reg, 4)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.LoadGlobal128(address)};
+ for (int i = 0; i < 4; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
+ }
+}
+
+void TranslatorVisitor::STG(u64 insn) {
+ // STG stores registers into global memory.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> data_reg;
+ BitField<46, 2, StoreCache> cache;
+ BitField<48, 3, StoreSize> size;
+ } const stg{insn};
+
+ // Pointer to store data into
+ const IR::U64 address{Address(*this, insn)};
+ const IR::Reg data_reg{stg.data_reg};
+ switch (stg.size) {
+ case StoreSize::U8:
+ ir.WriteGlobalU8(address, X(data_reg));
+ break;
+ case StoreSize::S8:
+ ir.WriteGlobalS8(address, X(data_reg));
+ break;
+ case StoreSize::U16:
+ ir.WriteGlobalU16(address, X(data_reg));
+ break;
+ case StoreSize::S16:
+ ir.WriteGlobalS16(address, X(data_reg));
+ break;
+ case StoreSize::B32:
+ ir.WriteGlobal32(address, X(data_reg));
+ break;
+ case StoreSize::B64: {
+ if (!IR::IsAligned(data_reg, 2)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
+ ir.WriteGlobal64(address, vector);
+ break;
+ }
+ case StoreSize::B128:
+ if (!IR::IsAligned(data_reg, 4)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{
+ ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
+ ir.WriteGlobal128(address, vector);
+ break;
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LogicalOp : u64 {
+ AND,
+ OR,
+ XOR,
+ PASS_B,
+};
+
+[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, LogicalOp op) {
+ switch (op) {
+ case LogicalOp::AND:
+ return ir.BitwiseAnd(operand_1, operand_2);
+ case LogicalOp::OR:
+ return ir.BitwiseOr(operand_1, operand_2);
+ case LogicalOp::XOR:
+ return ir.BitwiseXor(operand_1, operand_2);
+ case LogicalOp::PASS_B:
+ return operand_2;
+ default:
+ throw NotImplementedException("Invalid Logical operation {}", op);
+ }
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
+ LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
+ IR::Pred dest_pred = IR::Pred::PT) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ } const lop{insn};
+
+ if (x) {
+ throw NotImplementedException("X");
+ }
+ IR::U32 op_a{v.X(lop.src_reg)};
+ if (inv_a != 0) {
+ op_a = v.ir.BitwiseNot(op_a);
+ }
+ if (inv_b != 0) {
+ op_b = v.ir.BitwiseNot(op_b);
+ }
+
+ const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
+ if (pred_op) {
+ const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
+ v.ir.SetPred(dest_pred, pred_result);
+ }
+ if (cc) {
+ if (bit_op == LogicalOp::PASS_B) {
+ v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
+ v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
+ } else {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+ v.X(lop.dest_reg, result);
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 insn;
+ BitField<39, 1, u64> inv_a;
+ BitField<40, 1, u64> inv_b;
+ BitField<41, 2, LogicalOp> bit_op;
+ BitField<43, 1, u64> x;
+ BitField<44, 2, PredicateOp> pred_op;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> dest_pred;
+ } const lop{insn};
+
+ LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
+ lop.pred_op, lop.dest_pred);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP_reg(u64 insn) {
+ LOP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LOP_cbuf(u64 insn) {
+ LOP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LOP_imm(u64 insn) {
+ LOP(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::LOP32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<53, 2, LogicalOp> bit_op;
+ BitField<57, 1, u64> x;
+ BitField<52, 1, u64> cc;
+ BitField<55, 1, u64> inv_a;
+ BitField<56, 1, u64> inv_b;
+ } const lop32i{insn};
+
+ LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
+ lop32i.inv_b != 0, lop32i.bit_op);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
+// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
+IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
+ u64 ttbl) {
+ IR::U32 r{ir.Imm32(0)};
+ const IR::U32 not_a{ir.BitwiseNot(a)};
+ const IR::U32 not_b{ir.BitwiseNot(b)};
+ const IR::U32 not_c{ir.BitwiseNot(c)};
+ if (ttbl & 0x01) {
+ // r |= ~a & ~b & ~c;
+ const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x02) {
+ // r |= ~a & ~b & c;
+ const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x04) {
+ // r |= ~a & b & ~c;
+ const auto lhs{ir.BitwiseAnd(not_a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x08) {
+ // r |= ~a & b & c;
+ const auto lhs{ir.BitwiseAnd(not_a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x10) {
+ // r |= a & ~b & ~c;
+ const auto lhs{ir.BitwiseAnd(a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x20) {
+ // r |= a & ~b & c;
+ const auto lhs{ir.BitwiseAnd(a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x40) {
+ // r |= a & b & ~c;
+ const auto lhs{ir.BitwiseAnd(a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x80) {
+ // r |= a & b & c;
+ const auto lhs{ir.BitwiseAnd(a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ return r;
+}
+
+IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<47, 1, u64> cc;
+ } const lop3{insn};
+
+ if (lop3.cc != 0) {
+ throw NotImplementedException("LOP3 CC");
+ }
+
+ const IR::U32 op_a{v.X(lop3.src_reg)};
+ const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
+ v.X(lop3.dest_reg, result);
+ return result;
+}
+
+u64 GetLut48(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 8, u64> lut;
+ } const lut{insn};
+ return lut.lut;
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP3_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<28, 8, u64> lut;
+ BitField<38, 1, u64> x;
+ BitField<36, 2, PredicateOp> pred_op;
+ BitField<48, 3, IR::Pred> pred;
+ } const lop3{insn};
+
+ if (lop3.x != 0) {
+ throw NotImplementedException("LOP3 X");
+ }
+ const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
+ const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
+ ir.SetPred(lop3.pred, pred_result);
+}
+
+void TranslatorVisitor::LOP3_cbuf(u64 insn) {
+ LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
+}
+
+void TranslatorVisitor::LOP3_imm(u64 insn) {
+ LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ PR,
+ CC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::P2R_reg(u64) {
+ throw NotImplementedException("P2R (reg)");
+}
+
+void TranslatorVisitor::P2R_cbuf(u64) {
+ throw NotImplementedException("P2R (cbuf)");
+}
+
+void TranslatorVisitor::P2R_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src;
+ BitField<40, 1, Mode> mode;
+ BitField<41, 2, u64> byte_selector;
+ } const p2r{insn};
+
+ const u32 mask{GetImm20(insn).U32()};
+ const bool pr_mode{p2r.mode == Mode::PR};
+ const u32 num_items{pr_mode ? 7U : 4U};
+ const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
+ IR::U32 insert{ir.Imm32(0)};
+ for (u32 index = 0; index < num_items; ++index) {
+ if (((mask >> index) & 1) == 0) {
+ continue;
+ }
+ const IR::U1 cond{[this, index, pr_mode] {
+ if (pr_mode) {
+ return ir.GetPred(IR::Pred{index});
+ }
+ switch (index) {
+ case 0:
+ return ir.GetZFlag();
+ case 1:
+ return ir.GetSFlag();
+ case 2:
+ return ir.GetCFlag();
+ case 3:
+ return ir.GetOFlag();
+ }
+ throw LogicError("Unreachable P2R index");
+ }()};
+ const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
+ insert = ir.BitwiseOr(insert, bit);
+ }
+ const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
+ X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 4, u64> mask;
+ BitField<12, 4, u64> mov32i_mask;
+ } const mov{insn};
+
+ if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
+ throw NotImplementedException("Non-full move mask");
+ }
+ v.X(mov.dest_reg, src);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::MOV_reg(u64 insn) {
+ MOV(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::MOV_cbuf(u64 insn) {
+ MOV(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::MOV_imm(u64 insn) {
+ MOV(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::MOV32I(u64 insn) {
+ MOV(*this, insn, GetImm32(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ PR,
+ CC,
+};
+
+void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
+ switch (index) {
+ case 0:
+ return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
+ case 1:
+ return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
+ case 2:
+ return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
+ case 3:
+ return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
+ default:
+ throw LogicError("Unreachable R2P index");
+ }
+}
+
+void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<40, 1, Mode> mode;
+ BitField<41, 2, u64> byte_selector;
+ } const r2p{insn};
+ const IR::U32 src{v.X(r2p.src_reg)};
+ const IR::U32 count{v.ir.Imm32(1)};
+ const bool pr_mode{r2p.mode == Mode::PR};
+ const u32 num_items{pr_mode ? 7U : 4U};
+ const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
+ for (u32 index = 0; index < num_items; ++index) {
+ const IR::U32 offset{v.ir.Imm32(offset_base + index)};
+ const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
+ const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
+ const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
+ const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
+ if (pr_mode) {
+ const IR::Pred pred{index};
+ v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
+ } else {
+ SetFlag(v.ir, inv_mask_bit, src_bit, index);
+ }
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::R2P_reg(u64 insn) {
+ R2P(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::R2P_cbuf(u64 insn) {
+ R2P(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::R2P_imm(u64 insn) {
+ R2P(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SpecialRegister : u64 {
+ SR_LANEID = 0,
+ SR_CLOCK = 1,
+ SR_VIRTCFG = 2,
+ SR_VIRTID = 3,
+ SR_PM0 = 4,
+ SR_PM1 = 5,
+ SR_PM2 = 6,
+ SR_PM3 = 7,
+ SR_PM4 = 8,
+ SR_PM5 = 9,
+ SR_PM6 = 10,
+ SR_PM7 = 11,
+ SR12 = 12,
+ SR13 = 13,
+ SR14 = 14,
+ SR_ORDERING_TICKET = 15,
+ SR_PRIM_TYPE = 16,
+ SR_INVOCATION_ID = 17,
+ SR_Y_DIRECTION = 18,
+ SR_THREAD_KILL = 19,
+ SM_SHADER_TYPE = 20,
+ SR_DIRECTCBEWRITEADDRESSLOW = 21,
+ SR_DIRECTCBEWRITEADDRESSHIGH = 22,
+ SR_DIRECTCBEWRITEENABLE = 23,
+ SR_MACHINE_ID_0 = 24,
+ SR_MACHINE_ID_1 = 25,
+ SR_MACHINE_ID_2 = 26,
+ SR_MACHINE_ID_3 = 27,
+ SR_AFFINITY = 28,
+ SR_INVOCATION_INFO = 29,
+ SR_WSCALEFACTOR_XY = 30,
+ SR_WSCALEFACTOR_Z = 31,
+ SR_TID = 32,
+ SR_TID_X = 33,
+ SR_TID_Y = 34,
+ SR_TID_Z = 35,
+ SR_CTA_PARAM = 36,
+ SR_CTAID_X = 37,
+ SR_CTAID_Y = 38,
+ SR_CTAID_Z = 39,
+ SR_NTID = 40,
+ SR_CirQueueIncrMinusOne = 41,
+ SR_NLATC = 42,
+ SR43 = 43,
+ SR_SM_SPA_VERSION = 44,
+ SR_MULTIPASSSHADERINFO = 45,
+ SR_LWINHI = 46,
+ SR_SWINHI = 47,
+ SR_SWINLO = 48,
+ SR_SWINSZ = 49,
+ SR_SMEMSZ = 50,
+ SR_SMEMBANKS = 51,
+ SR_LWINLO = 52,
+ SR_LWINSZ = 53,
+ SR_LMEMLOSZ = 54,
+ SR_LMEMHIOFF = 55,
+ SR_EQMASK = 56,
+ SR_LTMASK = 57,
+ SR_LEMASK = 58,
+ SR_GTMASK = 59,
+ SR_GEMASK = 60,
+ SR_REGALLOC = 61,
+ SR_BARRIERALLOC = 62,
+ SR63 = 63,
+ SR_GLOBALERRORSTATUS = 64,
+ SR65 = 65,
+ SR_WARPERRORSTATUS = 66,
+ SR_WARPERRORSTATUSCLEAR = 67,
+ SR68 = 68,
+ SR69 = 69,
+ SR70 = 70,
+ SR71 = 71,
+ SR_PM_HI0 = 72,
+ SR_PM_HI1 = 73,
+ SR_PM_HI2 = 74,
+ SR_PM_HI3 = 75,
+ SR_PM_HI4 = 76,
+ SR_PM_HI5 = 77,
+ SR_PM_HI6 = 78,
+ SR_PM_HI7 = 79,
+ SR_CLOCKLO = 80,
+ SR_CLOCKHI = 81,
+ SR_GLOBALTIMERLO = 82,
+ SR_GLOBALTIMERHI = 83,
+ SR84 = 84,
+ SR85 = 85,
+ SR86 = 86,
+ SR87 = 87,
+ SR88 = 88,
+ SR89 = 89,
+ SR90 = 90,
+ SR91 = 91,
+ SR92 = 92,
+ SR93 = 93,
+ SR94 = 94,
+ SR95 = 95,
+ SR_HWTASKID = 96,
+ SR_CIRCULARQUEUEENTRYINDEX = 97,
+ SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
+ SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
+};
+
+[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
+ switch (special_register) {
+ case SpecialRegister::SR_INVOCATION_ID:
+ return ir.InvocationId();
+ case SpecialRegister::SR_THREAD_KILL:
+ return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
+ case SpecialRegister::SR_INVOCATION_INFO:
+ LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
+ return ir.Imm32(0x00ff'0000);
+ case SpecialRegister::SR_TID: {
+ const IR::Value tid{ir.LocalInvocationId()};
+ return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
+ IR::U32{ir.CompositeExtract(tid, 1)},
+ ir.Imm32(16), ir.Imm32(8)),
+ IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
+ }
+ case SpecialRegister::SR_TID_X:
+ return ir.LocalInvocationIdX();
+ case SpecialRegister::SR_TID_Y:
+ return ir.LocalInvocationIdY();
+ case SpecialRegister::SR_TID_Z:
+ return ir.LocalInvocationIdZ();
+ case SpecialRegister::SR_CTAID_X:
+ return ir.WorkgroupIdX();
+ case SpecialRegister::SR_CTAID_Y:
+ return ir.WorkgroupIdY();
+ case SpecialRegister::SR_CTAID_Z:
+ return ir.WorkgroupIdZ();
+ case SpecialRegister::SR_WSCALEFACTOR_XY:
+ LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
+ return ir.Imm32(Common::BitCast<u32>(1.0f));
+ case SpecialRegister::SR_WSCALEFACTOR_Z:
+ LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
+ return ir.Imm32(Common::BitCast<u32>(1.0f));
+ case SpecialRegister::SR_LANEID:
+ return ir.LaneId();
+ case SpecialRegister::SR_EQMASK:
+ return ir.SubgroupEqMask();
+ case SpecialRegister::SR_LTMASK:
+ return ir.SubgroupLtMask();
+ case SpecialRegister::SR_LEMASK:
+ return ir.SubgroupLeMask();
+ case SpecialRegister::SR_GTMASK:
+ return ir.SubgroupGtMask();
+ case SpecialRegister::SR_GEMASK:
+ return ir.SubgroupGeMask();
+ case SpecialRegister::SR_Y_DIRECTION:
+ return ir.BitCast<IR::U32>(ir.YDirection());
+ case SpecialRegister::SR_AFFINITY:
+ LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
+ return ir.Imm32(0); // This is the default value hardware returns.
+ default:
+ throw NotImplementedException("S2R special register {}", special_register);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::S2R(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 8, SpecialRegister> src_reg;
+ } const s2r{insn};
+
+ X(s2r.dest_reg, Read(ir, s2r.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
+ throw NotImplementedException("Instruction {} is not implemented", opcode);
+}
+
+void TranslatorVisitor::ATOM_cas(u64) {
+ ThrowNotImplemented(Opcode::ATOM_cas);
+}
+
+void TranslatorVisitor::ATOMS_cas(u64) {
+ ThrowNotImplemented(Opcode::ATOMS_cas);
+}
+
+void TranslatorVisitor::B2R(u64) {
+ ThrowNotImplemented(Opcode::B2R);
+}
+
+void TranslatorVisitor::BPT(u64) {
+ ThrowNotImplemented(Opcode::BPT);
+}
+
+void TranslatorVisitor::BRA(u64) {
+ ThrowNotImplemented(Opcode::BRA);
+}
+
+void TranslatorVisitor::BRK(u64) {
+ ThrowNotImplemented(Opcode::BRK);
+}
+
+void TranslatorVisitor::CAL() {
+ // CAL is a no-op
+}
+
+void TranslatorVisitor::CCTL(u64) {
+ ThrowNotImplemented(Opcode::CCTL);
+}
+
+void TranslatorVisitor::CCTLL(u64) {
+ ThrowNotImplemented(Opcode::CCTLL);
+}
+
+void TranslatorVisitor::CONT(u64) {
+ ThrowNotImplemented(Opcode::CONT);
+}
+
+void TranslatorVisitor::CS2R(u64) {
+ ThrowNotImplemented(Opcode::CS2R);
+}
+
+void TranslatorVisitor::FCHK_reg(u64) {
+ ThrowNotImplemented(Opcode::FCHK_reg);
+}
+
+void TranslatorVisitor::FCHK_cbuf(u64) {
+ ThrowNotImplemented(Opcode::FCHK_cbuf);
+}
+
+void TranslatorVisitor::FCHK_imm(u64) {
+ ThrowNotImplemented(Opcode::FCHK_imm);
+}
+
+void TranslatorVisitor::GETCRSPTR(u64) {
+ ThrowNotImplemented(Opcode::GETCRSPTR);
+}
+
+void TranslatorVisitor::GETLMEMBASE(u64) {
+ ThrowNotImplemented(Opcode::GETLMEMBASE);
+}
+
+void TranslatorVisitor::IDE(u64) {
+ ThrowNotImplemented(Opcode::IDE);
+}
+
+void TranslatorVisitor::IDP_reg(u64) {
+ ThrowNotImplemented(Opcode::IDP_reg);
+}
+
+void TranslatorVisitor::IDP_imm(u64) {
+ ThrowNotImplemented(Opcode::IDP_imm);
+}
+
+void TranslatorVisitor::IMAD_reg(u64) {
+ ThrowNotImplemented(Opcode::IMAD_reg);
+}
+
+void TranslatorVisitor::IMAD_rc(u64) {
+ ThrowNotImplemented(Opcode::IMAD_rc);
+}
+
+void TranslatorVisitor::IMAD_cr(u64) {
+ ThrowNotImplemented(Opcode::IMAD_cr);
+}
+
+void TranslatorVisitor::IMAD_imm(u64) {
+ ThrowNotImplemented(Opcode::IMAD_imm);
+}
+
+void TranslatorVisitor::IMAD32I(u64) {
+ ThrowNotImplemented(Opcode::IMAD32I);
+}
+
+void TranslatorVisitor::IMADSP_reg(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_reg);
+}
+
+void TranslatorVisitor::IMADSP_rc(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_rc);
+}
+
+void TranslatorVisitor::IMADSP_cr(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_cr);
+}
+
+void TranslatorVisitor::IMADSP_imm(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_imm);
+}
+
+void TranslatorVisitor::IMUL_reg(u64) {
+ ThrowNotImplemented(Opcode::IMUL_reg);
+}
+
+void TranslatorVisitor::IMUL_cbuf(u64) {
+ ThrowNotImplemented(Opcode::IMUL_cbuf);
+}
+
+void TranslatorVisitor::IMUL_imm(u64) {
+ ThrowNotImplemented(Opcode::IMUL_imm);
+}
+
+void TranslatorVisitor::IMUL32I(u64) {
+ ThrowNotImplemented(Opcode::IMUL32I);
+}
+
+void TranslatorVisitor::JCAL(u64) {
+ ThrowNotImplemented(Opcode::JCAL);
+}
+
+void TranslatorVisitor::JMP(u64) {
+ ThrowNotImplemented(Opcode::JMP);
+}
+
+void TranslatorVisitor::KIL() {
+ // KIL is a no-op
+}
+
+void TranslatorVisitor::LD(u64) {
+ ThrowNotImplemented(Opcode::LD);
+}
+
+void TranslatorVisitor::LEPC(u64) {
+ ThrowNotImplemented(Opcode::LEPC);
+}
+
+void TranslatorVisitor::LONGJMP(u64) {
+ ThrowNotImplemented(Opcode::LONGJMP);
+}
+
+void TranslatorVisitor::NOP(u64) {
+ // NOP is No-Op.
+}
+
+void TranslatorVisitor::PBK() {
+ // PBK is a no-op
+}
+
+void TranslatorVisitor::PCNT() {
+ // PCNT is a no-op
+}
+
+void TranslatorVisitor::PEXIT(u64) {
+ ThrowNotImplemented(Opcode::PEXIT);
+}
+
+void TranslatorVisitor::PLONGJMP(u64) {
+ ThrowNotImplemented(Opcode::PLONGJMP);
+}
+
+void TranslatorVisitor::PRET(u64) {
+ ThrowNotImplemented(Opcode::PRET);
+}
+
+void TranslatorVisitor::PRMT_reg(u64) {
+ ThrowNotImplemented(Opcode::PRMT_reg);
+}
+
+void TranslatorVisitor::PRMT_rc(u64) {
+ ThrowNotImplemented(Opcode::PRMT_rc);
+}
+
+void TranslatorVisitor::PRMT_cr(u64) {
+ ThrowNotImplemented(Opcode::PRMT_cr);
+}
+
+void TranslatorVisitor::PRMT_imm(u64) {
+ ThrowNotImplemented(Opcode::PRMT_imm);
+}
+
+void TranslatorVisitor::R2B(u64) {
+ ThrowNotImplemented(Opcode::R2B);
+}
+
+void TranslatorVisitor::RAM(u64) {
+ ThrowNotImplemented(Opcode::RAM);
+}
+
+void TranslatorVisitor::RET(u64) {
+ ThrowNotImplemented(Opcode::RET);
+}
+
+void TranslatorVisitor::RTT(u64) {
+ ThrowNotImplemented(Opcode::RTT);
+}
+
+void TranslatorVisitor::SAM(u64) {
+ ThrowNotImplemented(Opcode::SAM);
+}
+
+void TranslatorVisitor::SETCRSPTR(u64) {
+ ThrowNotImplemented(Opcode::SETCRSPTR);
+}
+
+void TranslatorVisitor::SETLMEMBASE(u64) {
+ ThrowNotImplemented(Opcode::SETLMEMBASE);
+}
+
+void TranslatorVisitor::SSY() {
+ // SSY is a no-op
+}
+
+void TranslatorVisitor::ST(u64) {
+ ThrowNotImplemented(Opcode::ST);
+}
+
+void TranslatorVisitor::STP(u64) {
+ ThrowNotImplemented(Opcode::STP);
+}
+
+void TranslatorVisitor::SUATOM_cas(u64) {
+ ThrowNotImplemented(Opcode::SUATOM_cas);
+}
+
+void TranslatorVisitor::SYNC(u64) {
+ ThrowNotImplemented(Opcode::SYNC);
+}
+
+void TranslatorVisitor::TXA(u64) {
+ ThrowNotImplemented(Opcode::TXA);
+}
+
+void TranslatorVisitor::VABSDIFF(u64) {
+ ThrowNotImplemented(Opcode::VABSDIFF);
+}
+
+void TranslatorVisitor::VABSDIFF4(u64) {
+ ThrowNotImplemented(Opcode::VABSDIFF4);
+}
+
+void TranslatorVisitor::VADD(u64) {
+ ThrowNotImplemented(Opcode::VADD);
+}
+
+void TranslatorVisitor::VSET(u64) {
+ ThrowNotImplemented(Opcode::VSET);
+}
+void TranslatorVisitor::VSHL(u64) {
+ ThrowNotImplemented(Opcode::VSHL);
+}
+
+void TranslatorVisitor::VSHR(u64) {
+ ThrowNotImplemented(Opcode::VSHR);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> output_reg; // Not needed on host
+ BitField<39, 1, u64> emit;
+ BitField<40, 1, u64> cut;
+ } const out{insn};
+
+ stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
+
+ if (out.emit != 0) {
+ v.ir.EmitVertex(stream_index);
+ }
+ if (out.cut != 0) {
+ v.ir.EndPrimitive(stream_index);
+ }
+ // Host doesn't need the output register, but we can write to it to avoid undefined reads
+ v.X(out.dest_reg, v.ir.Imm32(0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::OUT_reg(u64 insn) {
+ OUT(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::OUT_cbuf(u64 insn) {
+ OUT(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::OUT_imm(u64 insn) {
+ OUT(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Default,
+ CovMask,
+ Covered,
+ Offset,
+ CentroidOffset,
+ MyIndex,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::PIXLD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<31, 3, Mode> mode;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, s64> addr_offset;
+ BitField<45, 3, IR::Pred> dest_pred;
+ } const pixld{insn};
+
+ if (pixld.dest_pred != IR::Pred::PT) {
+ throw NotImplementedException("Destination predicate");
+ }
+ if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
+ throw NotImplementedException("Non-zero source register");
+ }
+ switch (pixld.mode) {
+ case Mode::MyIndex:
+ X(pixld.dest_reg, ir.SampleId());
+ break;
+ default:
+ throw NotImplementedException("Mode {}", pixld.mode.Value());
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<12, 3, IR::Pred> pred_a;
+ BitField<15, 1, u64> neg_pred_a;
+ BitField<24, 2, BooleanOp> bop_1;
+ BitField<29, 3, IR::Pred> pred_b;
+ BitField<32, 1, u64> neg_pred_b;
+ BitField<39, 3, IR::Pred> pred_c;
+ BitField<42, 1, u64> neg_pred_c;
+ BitField<45, 2, BooleanOp> bop_2;
+ } const pset{insn};
+
+ const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+ const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+ const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+ const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+ const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
+ const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
+ const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
+
+ ir.SetPred(pset.dest_pred_a, result_a);
+ ir.SetPred(pset.dest_pred_b, result_b);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSET(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<12, 3, IR::Pred> pred_a;
+ BitField<15, 1, u64> neg_pred_a;
+ BitField<24, 2, BooleanOp> bop_1;
+ BitField<29, 3, IR::Pred> pred_b;
+ BitField<32, 1, u64> neg_pred_b;
+ BitField<39, 3, IR::Pred> pred_c;
+ BitField<42, 1, u64> neg_pred_c;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop_2;
+ BitField<47, 1, u64> cc;
+ } const pset{insn};
+
+ const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+ const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+ const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+ const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+ const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
+
+ const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
+ const IR::U32 zero{ir.Imm32(0)};
+
+ const IR::U32 result{ir.Select(res_2, true_result, zero)};
+
+ X(pset.dest_reg, result);
+ if (pset.cc != 0) {
+ const IR::U1 is_zero{ir.IEqual(result, zero)};
+ SetZFlag(is_zero);
+ if (pset.bf != 0) {
+ ResetSFlag();
+ } else {
+ SetSFlag(ir.LogicalNot(is_zero));
+ }
+ ResetOFlag();
+ ResetCFlag();
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ } const sel{insn};
+
+ const IR::U1 pred = v.ir.GetPred(sel.pred);
+ IR::U32 op_a{v.X(sel.src_reg)};
+ IR::U32 op_b{src};
+ if (sel.neg_pred != 0) {
+ std::swap(op_a, op_b);
+ }
+ const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
+
+ v.X(sel.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SEL_reg(u64 insn) {
+ SEL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SEL_cbuf(u64 insn) {
+ SEL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SEL_imm(u64 insn) {
+ SEL(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+ _1D,
+ BUFFER_1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+};
+
+enum class Size : u64 {
+ U32,
+ S32,
+ U64,
+ S64,
+ F32FTZRN,
+ F16x2FTZRN,
+ SD32,
+ SD64,
+};
+
+enum class AtomicOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class Clamp : u64 {
+ IGN,
+ Default,
+ TRAP,
+};
+
+TextureType GetType(Type type) {
+ switch (type) {
+ case Type::_1D:
+ return TextureType::Color1D;
+ case Type::BUFFER_1D:
+ return TextureType::Buffer;
+ case Type::ARRAY_1D:
+ return TextureType::ColorArray1D;
+ case Type::_2D:
+ return TextureType::Color2D;
+ case Type::ARRAY_2D:
+ return TextureType::ColorArray2D;
+ case Type::_3D:
+ return TextureType::Color3D;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+ switch (type) {
+ case Type::_1D:
+ case Type::BUFFER_1D:
+ return v.X(reg);
+ case Type::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case Type::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ default:
+ break;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
+ const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomicOp::ADD:
+ return ir.ImageAtomicIAdd(handle, coords, op_b, info);
+ case AtomicOp::MIN:
+ return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
+ case AtomicOp::MAX:
+ return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
+ case AtomicOp::INC:
+ return ir.ImageAtomicInc(handle, coords, op_b, info);
+ case AtomicOp::DEC:
+ return ir.ImageAtomicDec(handle, coords, op_b, info);
+ case AtomicOp::AND:
+ return ir.ImageAtomicAnd(handle, coords, op_b, info);
+ case AtomicOp::OR:
+ return ir.ImageAtomicOr(handle, coords, op_b, info);
+ case AtomicOp::XOR:
+ return ir.ImageAtomicXor(handle, coords, op_b, info);
+ case AtomicOp::EXCH:
+ return ir.ImageAtomicExchange(handle, coords, op_b, info);
+ default:
+ throw NotImplementedException("Atomic Operation {}", op);
+ }
+}
+
+ImageFormat Format(Size size) {
+ switch (size) {
+ case Size::U32:
+ case Size::S32:
+ case Size::SD32:
+ return ImageFormat::R32_UINT;
+ default:
+ break;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+bool IsSizeInt32(Size size) {
+ switch (size) {
+ case Size::U32:
+ case Size::S32:
+ case Size::SD32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
+ IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
+ u64 bound_offset, bool is_bindless, bool write_result) {
+ if (clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", clamp);
+ }
+ if (!IsSizeInt32(size)) {
+ throw NotImplementedException("Size {}", size);
+ }
+ const bool is_signed{size == Size::S32};
+ const ImageFormat format{Format(size)};
+ const TextureType tex_type{GetType(type)};
+ const IR::Value coords{MakeCoords(v, coord_reg, type)};
+
+ const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
+ : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
+ IR::TextureInstInfo info{};
+ info.type.Assign(tex_type);
+ info.image_format.Assign(format);
+
+ // TODO: float/64-bit operand
+ const IR::Value op_b{v.X(operand_reg)};
+ const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
+
+ if (write_result) {
+ v.X(dest_reg, IR::U32{color});
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SUATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<54, 1, u64> is_bindless;
+ BitField<29, 4, AtomicOp> op;
+ BitField<33, 3, Type> type;
+ BitField<51, 3, Size> size;
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> operand_reg;
+ BitField<36, 13, u64> bound_offset; // !is_bindless
+ BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
+ } const suatom{insn};
+
+ ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
+ suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
+ suatom.is_bindless != 0, true);
+}
+
+void TranslatorVisitor::SURED(u64 insn) {
+ // TODO: confirm offsets
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<21, 3, AtomicOp> op;
+ BitField<33, 3, Type> type;
+ BitField<20, 3, Size> size;
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> operand_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const sured{insn};
+ ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
+ sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
+ sured.is_bound == 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+ _1D,
+ BUFFER_1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+};
+
+constexpr unsigned R = 1 << 0;
+constexpr unsigned G = 1 << 1;
+constexpr unsigned B = 1 << 2;
+constexpr unsigned A = 1 << 3;
+
+constexpr std::array MASK{
+ 0U, //
+ R, //
+ G, //
+ R | G, //
+ B, //
+ R | B, //
+ G | B, //
+ R | G | B, //
+ A, //
+ R | A, //
+ G | A, //
+ R | G | A, //
+ B | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+enum class Clamp : u64 {
+ IGN,
+ Default,
+ TRAP,
+};
+
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
+enum class LoadCache : u64 {
+ CA, // Cache at all levels, likely to be accessed again
+ CG, // Cache at global level (L2 and below, not L1)
+ CI, // ???
+ CV, // Don't cache and fetch again (volatile)
+};
+
+enum class StoreCache : u64 {
+ WB, // Cache write-back all coherent levels
+ CG, // Cache at global level (L2 and below, not L1)
+ CS, // Cache streaming, likely to be accessed once
+ WT, // Cache write-through (to system memory, volatile?)
+};
+
+ImageFormat Format(Size size) {
+ switch (size) {
+ case Size::U8:
+ return ImageFormat::R8_UINT;
+ case Size::S8:
+ return ImageFormat::R8_SINT;
+ case Size::U16:
+ return ImageFormat::R16_UINT;
+ case Size::S16:
+ return ImageFormat::R16_SINT;
+ case Size::B32:
+ return ImageFormat::R32_UINT;
+ case Size::B64:
+ return ImageFormat::R32G32_UINT;
+ case Size::B128:
+ return ImageFormat::R32G32B32A32_UINT;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+int SizeInRegs(Size size) {
+ switch (size) {
+ case Size::U8:
+ case Size::S8:
+ case Size::U16:
+ case Size::S16:
+ case Size::B32:
+ return 1;
+ case Size::B64:
+ return 2;
+ case Size::B128:
+ return 4;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+TextureType GetType(Type type) {
+ switch (type) {
+ case Type::_1D:
+ return TextureType::Color1D;
+ case Type::BUFFER_1D:
+ return TextureType::Buffer;
+ case Type::ARRAY_1D:
+ return TextureType::ColorArray1D;
+ case Type::_2D:
+ return TextureType::Color2D;
+ case Type::ARRAY_2D:
+ return TextureType::ColorArray2D;
+ case Type::_3D:
+ return TextureType::Color3D;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+ const auto array{[&](int index) {
+ return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
+ }};
+ switch (type) {
+ case Type::_1D:
+ case Type::BUFFER_1D:
+ return v.X(reg);
+ case Type::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.X(reg), array(1));
+ case Type::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case Type::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
+ case Type::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+unsigned SwizzleMask(u64 swizzle) {
+ if (swizzle == 0 || swizzle >= MASK.size()) {
+ throw NotImplementedException("Invalid swizzle {}", swizzle);
+ }
+ return MASK[swizzle];
+}
+
+IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
+ std::array<IR::U32, 4> colors;
+ for (int i = 0; i < num_regs; ++i) {
+ colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
+ }
+ for (int i = num_regs; i < 4; ++i) {
+ colors[static_cast<size_t>(i)] = ir.Imm32(0);
+ }
+ return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SULD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<52, 1, u64> d;
+ BitField<23, 1, u64> ba;
+ BitField<33, 3, Type> type;
+ BitField<24, 2, LoadCache> cache;
+ BitField<20, 3, Size> size; // .D
+ BitField<20, 4, u64> swizzle; // .P
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const suld{insn};
+
+ if (suld.clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", suld.clamp.Value());
+ }
+ if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
+ throw NotImplementedException("Cache {}", suld.cache.Value());
+ }
+ const bool is_typed{suld.d != 0};
+ if (is_typed && suld.ba != 0) {
+ throw NotImplementedException("BA");
+ }
+
+ const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
+ const TextureType type{GetType(suld.type)};
+ const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
+ const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
+ : X(suld.bindless_reg)};
+ IR::TextureInstInfo info{};
+ info.type.Assign(type);
+ info.image_format.Assign(format);
+
+ const IR::Value result{ir.ImageRead(handle, coords, info)};
+ IR::Reg dest_reg{suld.dest_reg};
+ if (is_typed) {
+ const int num_regs{SizeInRegs(suld.size)};
+ for (int i = 0; i < num_regs; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+ } else {
+ const unsigned mask{SwizzleMask(suld.swizzle)};
+ const int bits{std::popcount(mask)};
+ if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
+ throw NotImplementedException("Unaligned destination register");
+ }
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((mask >> component) & 1) == 0) {
+ continue;
+ }
+ X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
+ ++dest_reg;
+ }
+ }
+}
+
+void TranslatorVisitor::SUST(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<52, 1, u64> d;
+ BitField<23, 1, u64> ba;
+ BitField<33, 3, Type> type;
+ BitField<24, 2, StoreCache> cache;
+ BitField<20, 3, Size> size; // .D
+ BitField<20, 4, u64> swizzle; // .P
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> data_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const sust{insn};
+
+ if (sust.clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", sust.clamp.Value());
+ }
+ if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
+ throw NotImplementedException("Cache {}", sust.cache.Value());
+ }
+ const bool is_typed{sust.d != 0};
+ if (is_typed && sust.ba != 0) {
+ throw NotImplementedException("BA");
+ }
+ const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
+ const TextureType type{GetType(sust.type)};
+ const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
+ const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
+ : X(sust.bindless_reg)};
+ IR::TextureInstInfo info{};
+ info.type.Assign(type);
+ info.image_format.Assign(format);
+
+ IR::Value color;
+ if (is_typed) {
+ color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
+ } else {
+ const unsigned mask{SwizzleMask(sust.swizzle)};
+ if (mask != 0xf) {
+ throw NotImplementedException("Non-full mask");
+ }
+ color = MakeColor(ir, sust.data_reg, 4);
+ }
+ ir.ImageWrite(handle, coords, color, info);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Blod : u64 {
+ None,
+ LZ,
+ LB,
+ LL,
+ INVALIDBLOD4,
+ INVALIDBLOD5,
+ LBA,
+ LLA,
+};
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
+ switch (blod) {
+ case Blod::None:
+ return v.ir.Imm32(0.0f);
+ case Blod::LZ:
+ return v.ir.Imm32(0.0f);
+ case Blod::LB:
+ case Blod::LL:
+ case Blod::LBA:
+ case Blod::LLA:
+ return v.F(reg++);
+ case Blod::INVALIDBLOD4:
+ case Blod::INVALIDBLOD5:
+ break;
+ }
+ throw NotImplementedException("Invalid blod {}", blod);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+bool HasExplicitLod(Blod blod) {
+ switch (blod) {
+ case Blod::LL:
+ case Blod::LLA:
+ case Blod::LZ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
+ std::optional<u32> cbuf_offset) {
+ union {
+ u64 raw;
+ BitField<35, 1, u64> ndv;
+ BitField<49, 1, u64> nodep;
+ BitField<50, 1, u64> dc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ } const tex{insn};
+
+ if (lc) {
+ throw NotImplementedException("LC");
+ }
+ const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
+
+ IR::Reg meta_reg{tex.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::F32 dref;
+ IR::F32 lod_clamp;
+ if (cbuf_offset) {
+ handle = v.ir.Imm32(*cbuf_offset);
+ } else {
+ handle = v.X(meta_reg++);
+ }
+ const IR::F32 lod{MakeLod(v, meta_reg, blod)};
+ if (aoffi) {
+ offset = MakeOffset(v, meta_reg, tex.type);
+ }
+ if (tex.dc != 0) {
+ dref = v.F(meta_reg++);
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tex.type));
+ info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
+ info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
+ info.has_lod_clamp.Assign(lc ? 1 : 0);
+
+ const IR::Value sample{[&]() -> IR::Value {
+ if (tex.dc == 0) {
+ if (HasExplicitLod(blod)) {
+ return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
+ } else {
+ return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
+ }
+ }
+ if (HasExplicitLod(blod)) {
+ return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
+ } else {
+ return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+ info);
+ }
+ }()};
+
+ IR::Reg dest_reg{tex.dest_reg};
+ for (int element = 0; element < 4; ++element) {
+ if (((tex.mask >> element) & 1) == 0) {
+ continue;
+ }
+ IR::F32 value;
+ if (tex.dc != 0) {
+ value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
+ } else {
+ value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
+ }
+ v.F(dest_reg, value);
+ ++dest_reg;
+ }
+ if (tex.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEX(u64 insn) {
+ union {
+ u64 raw;
+ BitField<54, 1, u64> aoffi;
+ BitField<55, 3, Blod> blod;
+ BitField<58, 1, u64> lc;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tex{insn};
+
+ Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TEX_b(u64 insn) {
+ union {
+ u64 raw;
+ BitField<36, 1, u64> aoffi;
+ BitField<37, 3, Blod> blod;
+ BitField<40, 1, u64> lc;
+ } const tex{insn};
+
+ Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F16,
+ F32,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<59, 1, Precision> precision;
+ BitField<53, 4, u64> encoding;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+ BitField<50, 3, u64> swizzle;
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+ R, //
+ G, //
+ B, //
+ A, //
+ R | G, //
+ R | A, //
+ G | A, //
+ B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+ R | G | B, //
+ R | G | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+template <typename... Args>
+IR::Value Composite(TranslatorVisitor& v, Args... regs) {
+ return v.ir.CompositeConstruct(v.F(regs)...);
+}
+
+IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
+ return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding texs{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::Reg reg_a{texs.src_reg_a};
+ const IR::Reg reg_b{texs.src_reg_b};
+ IR::TextureInstInfo info{};
+ if (texs.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ switch (texs.encoding) {
+ case 0: // 1D.LZ
+ info.type.Assign(TextureType::Color1D);
+ return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
+ case 1: // 2D
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
+ case 2: // 2D.LZ
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
+ case 3: // 2D.LL
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
+ info);
+ case 4: // 2D.DC
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+ {}, {}, {}, info);
+ case 5: // 2D.LL.DC
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
+ v.F(reg_b + 1), v.F(reg_b), {}, info);
+ case 6: // 2D.LZ.DC
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+ zero, {}, info);
+ case 7: // ARRAY_2D
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ return v.ir.ImageSampleImplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ {}, {}, {}, info);
+ case 8: // ARRAY_2D.LZ
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ return v.ir.ImageSampleExplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ zero, {}, info);
+ case 9: // ARRAY_2D.LZ.DC
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ v.F(reg_b + 1), zero, {}, info);
+ case 10: // 3D
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color3D);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+ {}, info);
+ case 11: // 3D.LZ
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color3D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
+ info);
+ case 12: // CUBE
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorCube);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+ {}, info);
+ case 13: // CUBE.LL
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::ColorCube);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
+ v.F(reg_b + 1), {}, info);
+ default:
+ throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
+ }
+}
+
+unsigned Swizzle(u64 insn) {
+ const Encoding texs{insn};
+ const size_t encoding{texs.swizzle};
+ if (texs.dest_reg_b == IR::Reg::RZ) {
+ if (encoding >= RG_LUT.size()) {
+ throw NotImplementedException("Illegal RG encoding {}", encoding);
+ }
+ return RG_LUT[encoding];
+ } else {
+ if (encoding >= RGBA_LUT.size()) {
+ throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+ }
+ return RGBA_LUT[encoding];
+ }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+ const bool is_shadow{sample.Type() == IR::Type::F32};
+ if (is_shadow) {
+ const bool is_alpha{component == 3};
+ return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
+ } else {
+ return IR::F32{v.ir.CompositeExtract(sample, component)};
+ }
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+ const Encoding texs{insn};
+ switch (index) {
+ case 0:
+ return texs.dest_reg_a;
+ case 1:
+ CheckAlignment(texs.dest_reg_a, 2);
+ return texs.dest_reg_a + 1;
+ case 2:
+ return texs.dest_reg_b;
+ case 3:
+ CheckAlignment(texs.dest_reg_b, 2);
+ return texs.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+ v.F(dest, Extract(v, sample, component));
+ ++store_index;
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ std::array<IR::F32, 4> swizzled;
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ swizzled[store_index] = Extract(v, sample, component);
+ ++store_index;
+ }
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const Encoding texs{insn};
+ switch (store_index) {
+ case 1:
+ v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ switch (store_index) {
+ case 2:
+ break;
+ case 3:
+ v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
+ break;
+ case 4:
+ v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+ break;
+ }
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEXS(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+enum class OffsetType : u64 {
+ None = 0,
+ AOFFI,
+ PTP,
+ Invalid,
+};
+
+enum class ComponentType : u64 {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
+ const IR::U32 value1{v.X(reg++)};
+ const IR::U32 value2{v.X(reg++)};
+ const IR::U32 bitsize{v.ir.Imm32(6)};
+ const auto make_vector{[&v, &bitsize](const IR::U32& value) {
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
+ }};
+ return {make_vector(value1), make_vector(value2)};
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
+ bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<35, 1, u64> ndv;
+ BitField<49, 1, u64> nodep;
+ BitField<50, 1, u64> dc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tld4{insn};
+
+ const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
+
+ IR::Reg meta_reg{tld4.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::Value offset2;
+ IR::F32 dref;
+ if (!is_bindless) {
+ handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
+ } else {
+ handle = v.X(meta_reg++);
+ }
+ switch (offset_type) {
+ case OffsetType::None:
+ break;
+ case OffsetType::AOFFI:
+ offset = MakeOffset(v, meta_reg, tld4.type);
+ break;
+ case OffsetType::PTP:
+ std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
+ break;
+ default:
+ throw NotImplementedException("Invalid offset type {}", offset_type);
+ }
+ if (tld4.dc != 0) {
+ dref = v.F(meta_reg++);
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tld4.type));
+ info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
+ info.gather_component.Assign(static_cast<u32>(component_type));
+ const IR::Value sample{[&] {
+ if (tld4.dc == 0) {
+ return v.ir.ImageGather(handle, coords, offset, offset2, info);
+ }
+ return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
+ }()};
+
+ IR::Reg dest_reg{tld4.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tld4.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (tld4.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4(u64 insn) {
+ union {
+ u64 raw;
+ BitField<56, 2, ComponentType> component;
+ BitField<54, 2, OffsetType> offset;
+ } const tld4{insn};
+ Impl(*this, insn, tld4.component, tld4.offset, false);
+}
+
+void TranslatorVisitor::TLD4_b(u64 insn) {
+ union {
+ u64 raw;
+ BitField<38, 2, ComponentType> component;
+ BitField<36, 2, OffsetType> offset;
+ } const tld4{insn};
+ Impl(*this, insn, tld4.component, tld4.offset, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F32,
+ F16,
+};
+
+enum class ComponentType : u64 {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<55, 1, Precision> precision;
+ BitField<52, 2, ComponentType> component_type;
+ BitField<51, 1, u64> aoffi;
+ BitField<50, 1, u64> dc;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+ const IR::U32 value{v.X(reg)};
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding tld4s{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
+ const IR::Reg reg_a{tld4s.src_reg_a};
+ const IR::Reg reg_b{tld4s.src_reg_b};
+ IR::TextureInstInfo info{};
+ if (tld4s.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
+ info.type.Assign(Shader::TextureType::Color2D);
+ info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
+ IR::Value coords;
+ if (tld4s.aoffi != 0) {
+ CheckAlignment(reg_a, 2);
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+ IR::Value offset = MakeOffset(v, reg_b);
+ if (tld4s.dc != 0) {
+ CheckAlignment(reg_b, 2);
+ IR::F32 dref = v.F(reg_b + 1);
+ return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
+ }
+ return v.ir.ImageGather(handle, coords, offset, {}, info);
+ }
+ if (tld4s.dc != 0) {
+ CheckAlignment(reg_a, 2);
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+ IR::F32 dref = v.F(reg_b);
+ return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
+ }
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
+ return v.ir.ImageGather(handle, coords, {}, {}, info);
+}
+
+IR::Reg RegStoreComponent32(u64 insn, size_t index) {
+ const Encoding tlds4{insn};
+ switch (index) {
+ case 0:
+ return tlds4.dest_reg_a;
+ case 1:
+ CheckAlignment(tlds4.dest_reg_a, 2);
+ return tlds4.dest_reg_a + 1;
+ case 2:
+ return tlds4.dest_reg_b;
+ case 3:
+ CheckAlignment(tlds4.dest_reg_b, 2);
+ return tlds4.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ for (size_t component = 0; component < 4; ++component) {
+ const IR::Reg dest{RegStoreComponent32(insn, component)};
+ v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ std::array<IR::F32, 4> swizzled;
+ for (size_t component = 0; component < 4; ++component) {
+ swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
+ }
+ const Encoding tld4s{insn};
+ v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4S(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
+ const IR::U32 value{v.X(reg)};
+ const u32 base{has_lod_clamp ? 12U : 16U};
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<35, 1, u64> aoffi;
+ BitField<50, 1, u64> lc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> derivate_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const txd{insn};
+
+ const bool has_lod_clamp = txd.lc != 0;
+ if (has_lod_clamp) {
+ throw NotImplementedException("TXD.LC - CLAMP is not implemented");
+ }
+
+ IR::Value coords;
+ u32 num_derivates{};
+ IR::Reg base_reg{txd.coord_reg};
+ IR::Reg last_reg;
+ IR::Value handle;
+ if (is_bindless) {
+ handle = v.X(base_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
+ }
+
+ const auto read_array{[&]() -> IR::F32 {
+ const IR::U32 base{v.ir.Imm32(0)};
+ const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
+ const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
+ return v.ir.ConvertUToF(32, 16, array_index);
+ }};
+ switch (txd.type) {
+ case TextureType::_1D: {
+ coords = v.F(base_reg);
+ num_derivates = 1;
+ last_reg = base_reg + 1;
+ break;
+ }
+ case TextureType::ARRAY_1D: {
+ last_reg = base_reg + 1;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
+ num_derivates = 1;
+ break;
+ }
+ case TextureType::_2D: {
+ last_reg = base_reg + 2;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
+ num_derivates = 2;
+ break;
+ }
+ case TextureType::ARRAY_2D: {
+ last_reg = base_reg + 2;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
+ num_derivates = 2;
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid texture type");
+ }
+
+ const IR::Reg derivate_reg{txd.derivate_reg};
+ IR::Value derivates;
+ switch (num_derivates) {
+ case 1: {
+ derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
+ break;
+ }
+ case 2: {
+ derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
+ v.F(derivate_reg + 2), v.F(derivate_reg + 3));
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid texture type");
+ }
+
+ IR::Value offset;
+ if (txd.aoffi != 0) {
+ offset = MakeOffset(v, last_reg, has_lod_clamp);
+ }
+
+ IR::F32 lod_clamp;
+ if (has_lod_clamp) {
+ // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
+ // to convert a fixed point, float(value) / float(1 << fixed_point)
+ // in this case the fixed_point is 8.
+ const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
+ const IR::F32 fixp_lc{v.ir.ConvertUToF(
+ 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
+ lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
+ }
+
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(txd.type));
+ info.num_derivates.Assign(num_derivates);
+ info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
+ const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
+
+ IR::Reg dest_reg{txd.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((txd.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (txd.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXD(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TXD_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{
+ [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.X(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<55, 1, u64> lod;
+ BitField<50, 1, u64> multisample;
+ BitField<35, 1, u64> aoffi;
+ BitField<54, 1, u64> clamp;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tld{insn};
+
+ const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
+
+ IR::Reg meta_reg{tld.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::U32 lod;
+ IR::U32 multisample;
+ if (is_bindless) {
+ handle = v.X(meta_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
+ }
+ if (tld.lod != 0) {
+ lod = v.X(meta_reg++);
+ } else {
+ lod = v.ir.Imm32(0U);
+ }
+ if (tld.aoffi != 0) {
+ offset = MakeOffset(v, meta_reg, tld.type);
+ }
+ if (tld.multisample != 0) {
+ multisample = v.X(meta_reg++);
+ }
+ if (tld.clamp != 0) {
+ throw NotImplementedException("TLD.CL - CLAMP is not implmented");
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tld.type));
+ const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
+
+ IR::Reg dest_reg{tld.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tld.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (tld.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TLD_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F16,
+ F32,
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+ R, //
+ G, //
+ B, //
+ A, //
+ R | G, //
+ R | A, //
+ G | A, //
+ B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+ R | G | B, //
+ R | G | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+union Encoding {
+ u64 raw;
+ BitField<59, 1, Precision> precision;
+ BitField<54, 1, u64> aoffi;
+ BitField<53, 1, u64> lod;
+ BitField<55, 1, u64> ms;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+ BitField<50, 3, u64> swizzle;
+ BitField<53, 4, u64> encoding;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+ const IR::U32 value{v.X(reg)};
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding tlds{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
+ const IR::Reg reg_a{tlds.src_reg_a};
+ const IR::Reg reg_b{tlds.src_reg_b};
+ IR::Value coords;
+ IR::U32 lod{v.ir.Imm32(0U)};
+ IR::Value offsets;
+ IR::U32 multisample;
+ Shader::TextureType texture_type{};
+ switch (tlds.encoding) {
+ case 0:
+ texture_type = Shader::TextureType::Color1D;
+ coords = v.X(reg_a);
+ break;
+ case 1:
+ texture_type = Shader::TextureType::Color1D;
+ coords = v.X(reg_a);
+ lod = v.X(reg_b);
+ break;
+ case 2:
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
+ break;
+ case 4:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ offsets = MakeOffset(v, reg_b);
+ break;
+ case 5:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ lod = v.X(reg_b);
+ break;
+ case 6:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ multisample = v.X(reg_b);
+ break;
+ case 7:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color3D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
+ break;
+ case 8: {
+ CheckAlignment(reg_b, 2);
+ const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
+ texture_type = Shader::TextureType::ColorArray2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
+ break;
+ }
+ case 12:
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ lod = v.X(reg_b);
+ offsets = MakeOffset(v, reg_b + 1);
+ break;
+ default:
+ throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
+ }
+ IR::TextureInstInfo info{};
+ if (tlds.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ info.type.Assign(texture_type);
+ return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
+}
+
+unsigned Swizzle(u64 insn) {
+ const Encoding tlds{insn};
+ const size_t encoding{tlds.swizzle};
+ if (tlds.dest_reg_b == IR::Reg::RZ) {
+ if (encoding >= RG_LUT.size()) {
+ throw NotImplementedException("Illegal RG encoding {}", encoding);
+ }
+ return RG_LUT[encoding];
+ } else {
+ if (encoding >= RGBA_LUT.size()) {
+ throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+ }
+ return RGBA_LUT[encoding];
+ }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+ return IR::F32{v.ir.CompositeExtract(sample, component)};
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+ const Encoding tlds{insn};
+ switch (index) {
+ case 0:
+ return tlds.dest_reg_a;
+ case 1:
+ CheckAlignment(tlds.dest_reg_a, 2);
+ return tlds.dest_reg_a + 1;
+ case 2:
+ return tlds.dest_reg_b;
+ case 3:
+ CheckAlignment(tlds.dest_reg_b, 2);
+ return tlds.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+ v.F(dest, Extract(v, sample, component));
+ ++store_index;
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ std::array<IR::F32, 4> swizzled;
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ swizzled[store_index] = Extract(v, sample, component);
+ ++store_index;
+ }
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const Encoding tlds{insn};
+ switch (store_index) {
+ case 1:
+ v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ switch (store_index) {
+ case 2:
+ break;
+ case 3:
+ v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
+ break;
+ case 4:
+ v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+ break;
+ }
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLDS(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ // The ISA reads an array component here, but this is not needed on high level shading languages
+ // We are dropping this information.
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.F(reg + 1);
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<35, 1, u64> ndv;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tmml{insn};
+
+ if ((tmml.mask & 0b1100) != 0) {
+ throw NotImplementedException("TMML BA results are not implmented");
+ }
+ const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
+
+ IR::U32 handle;
+ IR::Reg meta_reg{tmml.meta_reg};
+ if (is_bindless) {
+ handle = v.X(meta_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tmml.type));
+ const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
+
+ IR::Reg dest_reg{tmml.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tmml.mask >> element) & 1) == 0) {
+ continue;
+ }
+ IR::F32 value{v.ir.CompositeExtract(sample, element)};
+ if (element < 2) {
+ IR::U32 casted_value;
+ if (element == 0) {
+ casted_value = v.ir.ConvertFToU(32, value);
+ } else {
+ casted_value = v.ir.ConvertFToS(16, value);
+ }
+ v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
+ } else {
+ v.F(dest_reg, value);
+ }
+ ++dest_reg;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TMML(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TMML_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Dimension = 1,
+ TextureType = 2,
+ SamplePos = 5,
+};
+
+IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
+ switch (mode) {
+ case Mode::Dimension: {
+ const IR::U32 lod{v.X(src_reg)};
+ return v.ir.ImageQueryDimension(handle, lod);
+ }
+ case Mode::TextureType:
+ case Mode::SamplePos:
+ default:
+ throw NotImplementedException("Mode {}", mode);
+ }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<22, 3, Mode> mode;
+ BitField<31, 4, u64> mask;
+ } const txq{insn};
+
+ IR::Reg src_reg{txq.src_reg};
+ IR::U32 handle;
+ if (cbuf_offset) {
+ handle = v.ir.Imm32(*cbuf_offset);
+ } else {
+ handle = v.X(src_reg);
+ ++src_reg;
+ }
+ const IR::Value query{Query(v, handle, txq.mode, src_reg)};
+ IR::Reg dest_reg{txq.dest_reg};
+ for (int element = 0; element < 4; ++element) {
+ if (((txq.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
+ ++dest_reg;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXQ(u64 insn) {
+ union {
+ u64 raw;
+ BitField<36, 13, u64> cbuf_offset;
+ } const txq{insn};
+
+ Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TXQ_b(u64 insn) {
+ Impl(*this, insn, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
+ u32 selector, bool is_signed) {
+ switch (width) {
+ case VideoWidth::Byte:
+ case VideoWidth::Unknown:
+ return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
+ case VideoWidth::Short:
+ return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
+ case VideoWidth::Word:
+ return value;
+ default:
+ throw NotImplementedException("Unknown VideoWidth {}", width);
+ }
+}
+
+VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
+ // immediates must be 16-bit format.
+ return is_immediate ? VideoWidth::Short : width;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+enum class VideoWidth : u64 {
+ Byte,
+ Unknown,
+ Short,
+ Word,
+};
+
+[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
+ VideoWidth width, u32 selector, bool is_signed);
+
+[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VideoMinMaxOps : u64 {
+ MRG_16H,
+ MRG_16L,
+ MRG_8B0,
+ MRG_8B2,
+ ACC,
+ MIN,
+ MAX,
+};
+
+[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
+ VideoMinMaxOps op, bool is_signed) {
+ switch (op) {
+ case VideoMinMaxOps::MIN:
+ return ir.IMin(lhs, rhs, is_signed);
+ case VideoMinMaxOps::MAX:
+ return ir.IMax(lhs, rhs, is_signed);
+ default:
+ throw NotImplementedException("VMNMX op {}", op);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VMNMX(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ BitField<51, 3, VideoMinMaxOps> op;
+ BitField<54, 1, u64> dest_sign;
+ BitField<55, 1, u64> sat;
+ BitField<56, 1, u64> mx;
+ } const vmnmx{insn};
+
+ if (vmnmx.cc != 0) {
+ throw NotImplementedException("VMNMX CC");
+ }
+ if (vmnmx.sat != 0) {
+ throw NotImplementedException("VMNMX SAT");
+ }
+ // Selectors were shown to default to 2 in unit tests
+ if (vmnmx.src_a_selector != 2) {
+ throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
+ }
+ if (vmnmx.src_b_selector != 2) {
+ throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
+ }
+ if (vmnmx.src_a_width != VideoWidth::Word) {
+ throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
+ }
+
+ const bool is_b_imm{vmnmx.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
+ const IR::U32 src_c{GetReg39(insn)};
+
+ const VideoWidth a_width{vmnmx.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vmnmx.src_a_sign != 0};
+ const bool src_b_signed{vmnmx.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
+
+ // First operation's sign is only dependent on operand b's sign
+ const bool op_1_signed{src_b_signed};
+
+ const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
+ : ir.IMin(op_a, op_b, op_1_signed)};
+ X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::VMAD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ BitField<51, 2, u64> scale;
+ BitField<53, 1, u64> src_c_neg;
+ BitField<54, 1, u64> src_a_neg;
+ BitField<55, 1, u64> sat;
+ } const vmad{insn};
+
+ if (vmad.cc != 0) {
+ throw NotImplementedException("VMAD CC");
+ }
+ if (vmad.sat != 0) {
+ throw NotImplementedException("VMAD SAT");
+ }
+ if (vmad.scale != 0) {
+ throw NotImplementedException("VMAD SCALE");
+ }
+ if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
+ throw NotImplementedException("VMAD PO");
+ }
+ if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
+ throw NotImplementedException("VMAD NEG");
+ }
+ const bool is_b_imm{vmad.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
+ const IR::U32 src_c{GetReg39(insn)};
+
+ const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
+ // Immediate values can't have a selector
+ const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
+ const VideoWidth a_width{vmad.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vmad.src_a_sign != 0};
+ const bool src_b_signed{vmad.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+ X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VsetpCompareOp : u64 {
+ False = 0,
+ LessThan,
+ Equal,
+ LessThanEqual,
+ GreaterThan = 16,
+ NotEqual,
+ GreaterThanEqual,
+ True,
+};
+
+CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
+ switch (op) {
+ case VsetpCompareOp::False:
+ return CompareOp::False;
+ case VsetpCompareOp::LessThan:
+ return CompareOp::LessThan;
+ case VsetpCompareOp::Equal:
+ return CompareOp::Equal;
+ case VsetpCompareOp::LessThanEqual:
+ return CompareOp::LessThanEqual;
+ case VsetpCompareOp::GreaterThan:
+ return CompareOp::GreaterThan;
+ case VsetpCompareOp::NotEqual:
+ return CompareOp::NotEqual;
+ case VsetpCompareOp::GreaterThanEqual:
+ return CompareOp::GreaterThanEqual;
+ case VsetpCompareOp::True:
+ return CompareOp::True;
+ default:
+ throw NotImplementedException("Invalid compare op {}", op);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 5, VsetpCompareOp> compare_op;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ } const vsetp{insn};
+
+ const bool is_b_imm{vsetp.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
+
+ const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
+ const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
+ const VideoWidth a_width{vsetp.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vsetp.src_a_sign != 0};
+ const bool src_b_signed{vsetp.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+ // Compare operation's sign is only dependent on operand b's sign
+ const bool compare_signed{src_b_signed};
+ const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
+ const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
+ const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
+ const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
+ ir.SetPred(vsetp.dest_pred_a, result_a);
+ ir.SetPred(vsetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VoteOp : u64 {
+ ALL,
+ ANY,
+ EQ,
+};
+
+[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
+ switch (vote_op) {
+ case VoteOp::ALL:
+ return ir.VoteAll(pred);
+ case VoteOp::ANY:
+ return ir.VoteAny(pred);
+ case VoteOp::EQ:
+ return ir.VoteEqual(pred);
+ default:
+ throw NotImplementedException("Invalid VOTE op {}", vote_op);
+ }
+}
+
+void Vote(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 3, IR::Pred> pred_a;
+ BitField<42, 1, u64> neg_pred_a;
+ BitField<45, 3, IR::Pred> pred_b;
+ BitField<48, 2, VoteOp> vote_op;
+ } const vote{insn};
+
+ const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
+ v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
+ v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VOTE(u64 insn) {
+ Vote(*this, insn);
+}
+
+void TranslatorVisitor::VOTE_vtg(u64) {
+ LOG_WARNING(Shader, "(STUBBED) called");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class ShuffleMode : u64 {
+ IDX,
+ UP,
+ DOWN,
+ BFLY,
+};
+
+[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
+ const IR::U32& index, const IR::U32& mask,
+ ShuffleMode shfl_op) {
+ const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
+ const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
+ switch (shfl_op) {
+ case ShuffleMode::IDX:
+ return ir.ShuffleIndex(value, index, clamp, seg_mask);
+ case ShuffleMode::UP:
+ return ir.ShuffleUp(value, index, clamp, seg_mask);
+ case ShuffleMode::DOWN:
+ return ir.ShuffleDown(value, index, clamp, seg_mask);
+ case ShuffleMode::BFLY:
+ return ir.ShuffleButterfly(value, index, clamp, seg_mask);
+ default:
+ throw NotImplementedException("Invalid SHFL op {}", shfl_op);
+ }
+}
+
+void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<30, 2, ShuffleMode> mode;
+ BitField<48, 3, IR::Pred> pred;
+ } const shfl{insn};
+
+ const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
+ v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
+ v.X(shfl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHFL(u64 insn) {
+ union {
+ u64 insn;
+ BitField<20, 5, u64> src_a_imm;
+ BitField<28, 1, u64> src_a_flag;
+ BitField<29, 1, u64> src_b_flag;
+ BitField<34, 13, u64> src_b_imm;
+ } const flags{insn};
+ const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
+ : GetReg20(insn)};
+ const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
+ : GetReg39(insn)};
+ Shuffle(*this, insn, src_a, src_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+
+namespace Shader::Maxwell {
+
+template <auto method>
+static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
+ using MethodType = decltype(method);
+ if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
+ (visitor.*method)(pc, insn);
+ } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
+ (visitor.*method)(insn);
+ } else {
+ (visitor.*method)();
+ }
+}
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
+ if (location_begin == location_end) {
+ return;
+ }
+ TranslatorVisitor visitor{env, *block};
+ for (Location pc = location_begin; pc != location_end; ++pc) {
+ const u64 insn{env.ReadInstruction(pc.Offset())};
+ try {
+ const Opcode opcode{Decode(insn)};
+ switch (opcode) {
+#define INST(name, cute, mask) \
+ case Opcode::name: \
+ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \
+ break;
+#include "shader_recompiler/frontend/maxwell/maxwell.inc"
+#undef OPCODE
+ default:
+ throw LogicError("Invalid opcode {}", opcode);
+ }
+ } catch (Exception& exception) {
+ exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
+ throw;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::Maxwell {
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "common/settings.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/post_order.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
+ size_t num_syntax_blocks{};
+ for (const auto& node : syntax_list) {
+ if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+ ++num_syntax_blocks;
+ }
+ }
+ IR::BlockList blocks;
+ blocks.reserve(num_syntax_blocks);
+ for (const auto& node : syntax_list) {
+ if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+ blocks.push_back(node.data.block);
+ }
+ }
+ return blocks;
+}
+
+void RemoveUnreachableBlocks(IR::Program& program) {
+ // Some blocks might be unreachable if a function call exists unconditionally
+ // If this happens the number of blocks and post order blocks will mismatch
+ if (program.blocks.size() == program.post_order_blocks.size()) {
+ return;
+ }
+ const auto begin{program.blocks.begin() + 1};
+ const auto end{program.blocks.end()};
+ const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
+ program.blocks.erase(std::remove_if(begin, end, pred), end);
+}
+
+void CollectInterpolationInfo(Environment& env, IR::Program& program) {
+ if (program.stage != Stage::Fragment) {
+ return;
+ }
+ const ProgramHeader& sph{env.SPH()};
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ std::optional<PixelImap> imap;
+ for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
+ if (value == PixelImap::Unused) {
+ continue;
+ }
+ if (imap && imap != value) {
+ throw NotImplementedException("Per component interpolation");
+ }
+ imap = value;
+ }
+ if (!imap) {
+ continue;
+ }
+ program.info.interpolation[index] = [&] {
+ switch (*imap) {
+ case PixelImap::Unused:
+ case PixelImap::Perspective:
+ return Interpolation::Smooth;
+ case PixelImap::Constant:
+ return Interpolation::Flat;
+ case PixelImap::ScreenLinear:
+ return Interpolation::NoPerspective;
+ }
+ throw NotImplementedException("Unknown interpolation {}", *imap);
+ }();
+ }
+}
+
+void AddNVNStorageBuffers(IR::Program& program) {
+ if (!program.info.uses_global_memory) {
+ return;
+ }
+ const u32 driver_cbuf{0};
+ const u32 descriptor_size{0x10};
+ const u32 num_buffers{16};
+ const u32 base{[&] {
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return 0x110u;
+ case Stage::TessellationControl:
+ return 0x210u;
+ case Stage::TessellationEval:
+ return 0x310u;
+ case Stage::Geometry:
+ return 0x410u;
+ case Stage::Fragment:
+ return 0x510u;
+ case Stage::Compute:
+ return 0x310u;
+ }
+ throw InvalidArgument("Invalid stage {}", program.stage);
+ }()};
+ auto& descs{program.info.storage_buffers_descriptors};
+ for (u32 index = 0; index < num_buffers; ++index) {
+ if (!program.info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const u32 offset{base + index * descriptor_size};
+ const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
+ if (it != descs.end()) {
+ it->is_written |= program.info.stores_global_memory;
+ continue;
+ }
+ descs.push_back({
+ .cbuf_index = driver_cbuf,
+ .cbuf_offset = offset,
+ .count = 1,
+ .is_written = program.info.stores_global_memory,
+ });
+ }
+}
+} // Anonymous namespace
+
+IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+ Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
+ IR::Program program;
+ program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+ program.blocks = GenerateBlocks(program.syntax_list);
+ program.post_order_blocks = PostOrder(program.syntax_list.front());
+ program.stage = env.ShaderStage();
+ program.local_memory_size = env.LocalMemorySize();
+ switch (program.stage) {
+ case Stage::TessellationControl: {
+ const ProgramHeader& sph{env.SPH()};
+ program.invocations = sph.common2.threads_per_input_primitive;
+ break;
+ }
+ case Stage::Geometry: {
+ const ProgramHeader& sph{env.SPH()};
+ program.output_topology = sph.common3.output_topology;
+ program.output_vertices = sph.common4.max_output_vertices;
+ program.invocations = sph.common2.threads_per_input_primitive;
+ program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
+ if (program.is_geometry_passthrough) {
+ const auto& mask{env.GpPassthroughMask()};
+ for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
+ program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
+ }
+ }
+ break;
+ }
+ case Stage::Compute:
+ program.workgroup_size = env.WorkgroupSize();
+ program.shared_memory_size = env.SharedMemorySize();
+ break;
+ default:
+ break;
+ }
+ RemoveUnreachableBlocks(program);
+
+ // Replace instructions before the SSA rewrite
+ if (!host_info.support_float16) {
+ Optimization::LowerFp16ToFp32(program);
+ }
+ if (!host_info.support_int64) {
+ Optimization::LowerInt64ToInt32(program);
+ }
+ Optimization::SsaRewritePass(program);
+
+ Optimization::GlobalMemoryToStorageBufferPass(program);
+ Optimization::TexturePass(env, program);
+
+ Optimization::ConstantPropagationPass(program);
+ Optimization::DeadCodeEliminationPass(program);
+ if (Settings::values.renderer_debug) {
+ Optimization::VerificationPass(program);
+ }
+ Optimization::CollectShaderInfoPass(env, program);
+ CollectInterpolationInfo(env, program);
+ AddNVNStorageBuffers(program);
+ return program;
+}
+
+IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+ Environment& env_vertex_b) {
+ IR::Program result{};
+ Optimization::VertexATransformPass(vertex_a);
+ Optimization::VertexBTransformPass(vertex_b);
+ for (const auto& term : vertex_a.syntax_list) {
+ if (term.type != IR::AbstractSyntaxNode::Type::Return) {
+ result.syntax_list.push_back(term);
+ }
+ }
+ result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
+ vertex_b.syntax_list.end());
+ result.blocks = GenerateBlocks(result.syntax_list);
+ result.post_order_blocks = vertex_b.post_order_blocks;
+ for (const auto& block : vertex_a.post_order_blocks) {
+ result.post_order_blocks.push_back(block);
+ }
+ result.stage = Stage::VertexB;
+ result.info = vertex_a.info;
+ result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
+ result.info.loads.mask |= vertex_b.info.loads.mask;
+ result.info.stores.mask |= vertex_b.info.stores.mask;
+
+ Optimization::JoinTextureInfo(result.info, vertex_b.info);
+ Optimization::JoinStorageInfo(result.info, vertex_b.info);
+ Optimization::DeadCodeEliminationPass(result);
+ if (Settings::values.renderer_debug) {
+ Optimization::VerificationPass(result);
+ }
+ Optimization::CollectShaderInfoPass(env_vertex_b, result);
+ return result;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool, Environment& env,
+ Flow::CFG& cfg, const HostTranslateInfo& host_info);
+
+[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+ Environment& env_vertex_b);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
new file mode 100644
index 000000000..94a584219
--- /dev/null
+++ b/src/shader_recompiler/host_translate_info.h
@@ -0,0 +1,18 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Shader {
+
+// Try to keep entries here to a minimum
+// They can accidentally change the cached information in a shader
+
+/// Misc information about the host
+struct HostTranslateInfo {
+ bool support_float16{}; ///< True when the device supports 16-bit floats
+ bool support_int64{}; ///< True when the device supports 64-bit integers
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
new file mode 100644
index 000000000..5ead930f1
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -0,0 +1,928 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/alignment.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
+ if (count != 1) {
+ throw NotImplementedException("Constant buffer descriptor indexing");
+ }
+ if ((info.constant_buffer_mask & (1U << index)) != 0) {
+ return;
+ }
+ info.constant_buffer_mask |= 1U << index;
+
+ auto& cbufs{info.constant_buffer_descriptors};
+ cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
+ ConstantBufferDescriptor{
+ .index = index,
+ .count = 1,
+ });
+}
+
+void GetPatch(Info& info, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Reading non-generic patch {}", patch);
+ }
+ info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
+}
+
+void SetPatch(Info& info, IR::Patch patch) {
+ if (IR::IsGeneric(patch)) {
+ info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
+ return;
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodBottom:
+ info.stores_tess_level_outer = true;
+ break;
+ case IR::Patch::TessellationLodInteriorU:
+ case IR::Patch::TessellationLodInteriorV:
+ info.stores_tess_level_inner = true;
+ break;
+ default:
+ throw NotImplementedException("Set patch {}", patch);
+ }
+}
+
+void CheckCBufNVN(Info& info, IR::Inst& inst) {
+ const IR::Value cbuf_index{inst.Arg(0)};
+ if (!cbuf_index.IsImmediate()) {
+ info.nvn_buffer_used.set();
+ return;
+ }
+ const u32 index{cbuf_index.U32()};
+ if (index != 0) {
+ return;
+ }
+ const IR::Value cbuf_offset{inst.Arg(1)};
+ if (!cbuf_offset.IsImmediate()) {
+ info.nvn_buffer_used.set();
+ return;
+ }
+ const u32 offset{cbuf_offset.U32()};
+ const u32 descriptor_size{0x10};
+ const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
+ if (offset >= info.nvn_buffer_base && offset < upper_limit) {
+ const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
+ info.nvn_buffer_used.set(nvn_index, true);
+ }
+}
+
+void VisitUsages(Info& info, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::CompositeConstructF16x2:
+ case IR::Opcode::CompositeConstructF16x3:
+ case IR::Opcode::CompositeConstructF16x4:
+ case IR::Opcode::CompositeExtractF16x2:
+ case IR::Opcode::CompositeExtractF16x3:
+ case IR::Opcode::CompositeExtractF16x4:
+ case IR::Opcode::CompositeInsertF16x2:
+ case IR::Opcode::CompositeInsertF16x3:
+ case IR::Opcode::CompositeInsertF16x4:
+ case IR::Opcode::SelectF16:
+ case IR::Opcode::BitCastU16F16:
+ case IR::Opcode::BitCastF16U16:
+ case IR::Opcode::PackFloat2x16:
+ case IR::Opcode::UnpackFloat2x16:
+ case IR::Opcode::ConvertS16F16:
+ case IR::Opcode::ConvertS32F16:
+ case IR::Opcode::ConvertS64F16:
+ case IR::Opcode::ConvertU16F16:
+ case IR::Opcode::ConvertU32F16:
+ case IR::Opcode::ConvertU64F16:
+ case IR::Opcode::ConvertF16S8:
+ case IR::Opcode::ConvertF16S16:
+ case IR::Opcode::ConvertF16S32:
+ case IR::Opcode::ConvertF16S64:
+ case IR::Opcode::ConvertF16U8:
+ case IR::Opcode::ConvertF16U16:
+ case IR::Opcode::ConvertF16U32:
+ case IR::Opcode::ConvertF16U64:
+ case IR::Opcode::FPAbs16:
+ case IR::Opcode::FPAdd16:
+ case IR::Opcode::FPCeil16:
+ case IR::Opcode::FPFloor16:
+ case IR::Opcode::FPFma16:
+ case IR::Opcode::FPMul16:
+ case IR::Opcode::FPNeg16:
+ case IR::Opcode::FPRoundEven16:
+ case IR::Opcode::FPSaturate16:
+ case IR::Opcode::FPClamp16:
+ case IR::Opcode::FPTrunc16:
+ case IR::Opcode::FPOrdEqual16:
+ case IR::Opcode::FPUnordEqual16:
+ case IR::Opcode::FPOrdNotEqual16:
+ case IR::Opcode::FPUnordNotEqual16:
+ case IR::Opcode::FPOrdLessThan16:
+ case IR::Opcode::FPUnordLessThan16:
+ case IR::Opcode::FPOrdGreaterThan16:
+ case IR::Opcode::FPUnordGreaterThan16:
+ case IR::Opcode::FPOrdLessThanEqual16:
+ case IR::Opcode::FPUnordLessThanEqual16:
+ case IR::Opcode::FPOrdGreaterThanEqual16:
+ case IR::Opcode::FPUnordGreaterThanEqual16:
+ case IR::Opcode::FPIsNan16:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::StorageAtomicAddF16x2:
+ case IR::Opcode::StorageAtomicMinF16x2:
+ case IR::Opcode::StorageAtomicMaxF16x2:
+ info.uses_fp16 = true;
+ break;
+ case IR::Opcode::CompositeConstructF64x2:
+ case IR::Opcode::CompositeConstructF64x3:
+ case IR::Opcode::CompositeConstructF64x4:
+ case IR::Opcode::CompositeExtractF64x2:
+ case IR::Opcode::CompositeExtractF64x3:
+ case IR::Opcode::CompositeExtractF64x4:
+ case IR::Opcode::CompositeInsertF64x2:
+ case IR::Opcode::CompositeInsertF64x3:
+ case IR::Opcode::CompositeInsertF64x4:
+ case IR::Opcode::SelectF64:
+ case IR::Opcode::BitCastU64F64:
+ case IR::Opcode::BitCastF64U64:
+ case IR::Opcode::PackDouble2x32:
+ case IR::Opcode::UnpackDouble2x32:
+ case IR::Opcode::FPAbs64:
+ case IR::Opcode::FPAdd64:
+ case IR::Opcode::FPCeil64:
+ case IR::Opcode::FPFloor64:
+ case IR::Opcode::FPFma64:
+ case IR::Opcode::FPMax64:
+ case IR::Opcode::FPMin64:
+ case IR::Opcode::FPMul64:
+ case IR::Opcode::FPNeg64:
+ case IR::Opcode::FPRecip64:
+ case IR::Opcode::FPRecipSqrt64:
+ case IR::Opcode::FPRoundEven64:
+ case IR::Opcode::FPSaturate64:
+ case IR::Opcode::FPClamp64:
+ case IR::Opcode::FPTrunc64:
+ case IR::Opcode::FPOrdEqual64:
+ case IR::Opcode::FPUnordEqual64:
+ case IR::Opcode::FPOrdNotEqual64:
+ case IR::Opcode::FPUnordNotEqual64:
+ case IR::Opcode::FPOrdLessThan64:
+ case IR::Opcode::FPUnordLessThan64:
+ case IR::Opcode::FPOrdGreaterThan64:
+ case IR::Opcode::FPUnordGreaterThan64:
+ case IR::Opcode::FPOrdLessThanEqual64:
+ case IR::Opcode::FPUnordLessThanEqual64:
+ case IR::Opcode::FPOrdGreaterThanEqual64:
+ case IR::Opcode::FPUnordGreaterThanEqual64:
+ case IR::Opcode::FPIsNan64:
+ case IR::Opcode::ConvertS16F64:
+ case IR::Opcode::ConvertS32F64:
+ case IR::Opcode::ConvertS64F64:
+ case IR::Opcode::ConvertU16F64:
+ case IR::Opcode::ConvertU32F64:
+ case IR::Opcode::ConvertU64F64:
+ case IR::Opcode::ConvertF32F64:
+ case IR::Opcode::ConvertF64F32:
+ case IR::Opcode::ConvertF64S8:
+ case IR::Opcode::ConvertF64S16:
+ case IR::Opcode::ConvertF64S32:
+ case IR::Opcode::ConvertF64S64:
+ case IR::Opcode::ConvertF64U8:
+ case IR::Opcode::ConvertF64U16:
+ case IR::Opcode::ConvertF64U32:
+ case IR::Opcode::ConvertF64U64:
+ info.uses_fp64 = true;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ case IR::Opcode::UndefU8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::LoadStorageU8:
+ case IR::Opcode::LoadStorageS8:
+ case IR::Opcode::WriteStorageU8:
+ case IR::Opcode::WriteStorageS8:
+ case IR::Opcode::LoadSharedU8:
+ case IR::Opcode::LoadSharedS8:
+ case IR::Opcode::WriteSharedU8:
+ case IR::Opcode::SelectU8:
+ case IR::Opcode::ConvertF16S8:
+ case IR::Opcode::ConvertF16U8:
+ case IR::Opcode::ConvertF32S8:
+ case IR::Opcode::ConvertF32U8:
+ case IR::Opcode::ConvertF64S8:
+ case IR::Opcode::ConvertF64U8:
+ info.uses_int8 = true;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ case IR::Opcode::UndefU16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::LoadStorageU16:
+ case IR::Opcode::LoadStorageS16:
+ case IR::Opcode::WriteStorageU16:
+ case IR::Opcode::WriteStorageS16:
+ case IR::Opcode::LoadSharedU16:
+ case IR::Opcode::LoadSharedS16:
+ case IR::Opcode::WriteSharedU16:
+ case IR::Opcode::SelectU16:
+ case IR::Opcode::BitCastU16F16:
+ case IR::Opcode::BitCastF16U16:
+ case IR::Opcode::ConvertS16F16:
+ case IR::Opcode::ConvertS16F32:
+ case IR::Opcode::ConvertS16F64:
+ case IR::Opcode::ConvertU16F16:
+ case IR::Opcode::ConvertU16F32:
+ case IR::Opcode::ConvertU16F64:
+ case IR::Opcode::ConvertF16S16:
+ case IR::Opcode::ConvertF16U16:
+ case IR::Opcode::ConvertF32S16:
+ case IR::Opcode::ConvertF32U16:
+ case IR::Opcode::ConvertF64S16:
+ case IR::Opcode::ConvertF64U16:
+ info.uses_int16 = true;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::UndefU64:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ case IR::Opcode::SelectU64:
+ case IR::Opcode::BitCastU64F64:
+ case IR::Opcode::BitCastF64U64:
+ case IR::Opcode::PackUint2x32:
+ case IR::Opcode::UnpackUint2x32:
+ case IR::Opcode::IAdd64:
+ case IR::Opcode::ISub64:
+ case IR::Opcode::INeg64:
+ case IR::Opcode::ShiftLeftLogical64:
+ case IR::Opcode::ShiftRightLogical64:
+ case IR::Opcode::ShiftRightArithmetic64:
+ case IR::Opcode::ConvertS64F16:
+ case IR::Opcode::ConvertS64F32:
+ case IR::Opcode::ConvertS64F64:
+ case IR::Opcode::ConvertU64F16:
+ case IR::Opcode::ConvertU64F32:
+ case IR::Opcode::ConvertU64F64:
+ case IR::Opcode::ConvertU64U32:
+ case IR::Opcode::ConvertU32U64:
+ case IR::Opcode::ConvertF16U64:
+ case IR::Opcode::ConvertF32U64:
+ case IR::Opcode::ConvertF64U64:
+ case IR::Opcode::SharedAtomicExchange64:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::StorageAtomicIAdd64:
+ case IR::Opcode::StorageAtomicSMin64:
+ case IR::Opcode::StorageAtomicUMin64:
+ case IR::Opcode::StorageAtomicSMax64:
+ case IR::Opcode::StorageAtomicUMax64:
+ case IR::Opcode::StorageAtomicAnd64:
+ case IR::Opcode::StorageAtomicOr64:
+ case IR::Opcode::StorageAtomicXor64:
+ case IR::Opcode::StorageAtomicExchange64:
+ info.uses_int64 = true;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ case IR::Opcode::GlobalAtomicIAdd32:
+ case IR::Opcode::GlobalAtomicSMin32:
+ case IR::Opcode::GlobalAtomicUMin32:
+ case IR::Opcode::GlobalAtomicSMax32:
+ case IR::Opcode::GlobalAtomicUMax32:
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::GlobalAtomicAnd32:
+ case IR::Opcode::GlobalAtomicOr32:
+ case IR::Opcode::GlobalAtomicXor32:
+ case IR::Opcode::GlobalAtomicExchange32:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ info.stores_global_memory = true;
+ [[fallthrough]];
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ info.uses_int64 = true;
+ info.uses_global_memory = true;
+ info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
+ info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::DemoteToHelperInvocation:
+ info.uses_demote_to_helper_invocation = true;
+ break;
+ case IR::Opcode::GetAttribute:
+ info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
+ break;
+ case IR::Opcode::SetAttribute:
+ info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
+ break;
+ case IR::Opcode::GetPatch:
+ GetPatch(info, inst.Arg(0).Patch());
+ break;
+ case IR::Opcode::SetPatch:
+ SetPatch(info, inst.Arg(0).Patch());
+ break;
+ case IR::Opcode::GetAttributeIndexed:
+ info.loads_indexed_attributes = true;
+ break;
+ case IR::Opcode::SetAttributeIndexed:
+ info.stores_indexed_attributes = true;
+ break;
+ case IR::Opcode::SetFragColor:
+ info.stores_frag_color[inst.Arg(0).U32()] = true;
+ break;
+ case IR::Opcode::SetSampleMask:
+ info.stores_sample_mask = true;
+ break;
+ case IR::Opcode::SetFragDepth:
+ info.stores_frag_depth = true;
+ break;
+ case IR::Opcode::WorkgroupId:
+ info.uses_workgroup_id = true;
+ break;
+ case IR::Opcode::LocalInvocationId:
+ info.uses_local_invocation_id = true;
+ break;
+ case IR::Opcode::InvocationId:
+ info.uses_invocation_id = true;
+ break;
+ case IR::Opcode::SampleId:
+ info.uses_sample_id = true;
+ break;
+ case IR::Opcode::IsHelperInvocation:
+ info.uses_is_helper_invocation = true;
+ break;
+ case IR::Opcode::LaneId:
+ info.uses_subgroup_invocation_id = true;
+ break;
+ case IR::Opcode::ShuffleIndex:
+ case IR::Opcode::ShuffleUp:
+ case IR::Opcode::ShuffleDown:
+ case IR::Opcode::ShuffleButterfly:
+ info.uses_subgroup_shuffles = true;
+ break;
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ case IR::Opcode::GetCbufU32:
+ case IR::Opcode::GetCbufF32:
+ case IR::Opcode::GetCbufU32x2: {
+ const IR::Value index{inst.Arg(0)};
+ const IR::Value offset{inst.Arg(1)};
+ if (!index.IsImmediate()) {
+ throw NotImplementedException("Constant buffer with non-immediate index");
+ }
+ AddConstantBufferDescriptor(info, index.U32(), 1);
+ u32 element_size{};
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ info.used_constant_buffer_types |= IR::Type::U8;
+ element_size = 1;
+ break;
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ info.used_constant_buffer_types |= IR::Type::U16;
+ element_size = 2;
+ break;
+ case IR::Opcode::GetCbufU32:
+ info.used_constant_buffer_types |= IR::Type::U32;
+ element_size = 4;
+ break;
+ case IR::Opcode::GetCbufF32:
+ info.used_constant_buffer_types |= IR::Type::F32;
+ element_size = 4;
+ break;
+ case IR::Opcode::GetCbufU32x2:
+ info.used_constant_buffer_types |= IR::Type::U32x2;
+ element_size = 8;
+ break;
+ default:
+ break;
+ }
+ u32& size{info.constant_buffer_used_sizes[index.U32()]};
+ if (offset.IsImmediate()) {
+ size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
+ } else {
+ size = 0x10'000;
+ }
+ break;
+ }
+ case IR::Opcode::BindlessImageSampleImplicitLod:
+ case IR::Opcode::BindlessImageSampleExplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+ case IR::Opcode::BindlessImageGather:
+ case IR::Opcode::BindlessImageGatherDref:
+ case IR::Opcode::BindlessImageFetch:
+ case IR::Opcode::BindlessImageQueryDimensions:
+ case IR::Opcode::BindlessImageQueryLod:
+ case IR::Opcode::BindlessImageGradient:
+ case IR::Opcode::BoundImageSampleImplicitLod:
+ case IR::Opcode::BoundImageSampleExplicitLod:
+ case IR::Opcode::BoundImageSampleDrefImplicitLod:
+ case IR::Opcode::BoundImageSampleDrefExplicitLod:
+ case IR::Opcode::BoundImageGather:
+ case IR::Opcode::BoundImageGatherDref:
+ case IR::Opcode::BoundImageFetch:
+ case IR::Opcode::BoundImageQueryDimensions:
+ case IR::Opcode::BoundImageQueryLod:
+ case IR::Opcode::BoundImageGradient:
+ case IR::Opcode::ImageGather:
+ case IR::Opcode::ImageGatherDref:
+ case IR::Opcode::ImageFetch:
+ case IR::Opcode::ImageQueryDimensions:
+ case IR::Opcode::ImageGradient: {
+ const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
+ info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
+ info.uses_sparse_residency |=
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+ break;
+ }
+ case IR::Opcode::ImageSampleImplicitLod:
+ case IR::Opcode::ImageSampleExplicitLod:
+ case IR::Opcode::ImageSampleDrefImplicitLod:
+ case IR::Opcode::ImageSampleDrefExplicitLod:
+ case IR::Opcode::ImageQueryLod: {
+ const auto flags{inst.Flags<IR::TextureInstInfo>()};
+ const TextureType type{flags.type};
+ info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
+ info.uses_shadow_lod |= flags.is_depth != 0;
+ info.uses_sparse_residency |=
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+ break;
+ }
+ case IR::Opcode::ImageRead: {
+ const auto flags{inst.Flags<IR::TextureInstInfo>()};
+ info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless;
+ info.uses_sparse_residency |=
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+ break;
+ }
+ case IR::Opcode::ImageWrite: {
+ const auto flags{inst.Flags<IR::TextureInstInfo>()};
+ info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless;
+ info.uses_image_buffers |= flags.type == TextureType::Buffer;
+ break;
+ }
+ case IR::Opcode::SubgroupEqMask:
+ case IR::Opcode::SubgroupLtMask:
+ case IR::Opcode::SubgroupLeMask:
+ case IR::Opcode::SubgroupGtMask:
+ case IR::Opcode::SubgroupGeMask:
+ info.uses_subgroup_mask = true;
+ break;
+ case IR::Opcode::VoteAll:
+ case IR::Opcode::VoteAny:
+ case IR::Opcode::VoteEqual:
+ case IR::Opcode::SubgroupBallot:
+ info.uses_subgroup_vote = true;
+ break;
+ case IR::Opcode::FSwizzleAdd:
+ info.uses_fswzadd = true;
+ break;
+ case IR::Opcode::DPdxFine:
+ case IR::Opcode::DPdyFine:
+ case IR::Opcode::DPdxCoarse:
+ case IR::Opcode::DPdyCoarse:
+ info.uses_derivatives = true;
+ break;
+ case IR::Opcode::LoadStorageU8:
+ case IR::Opcode::LoadStorageS8:
+ case IR::Opcode::WriteStorageU8:
+ case IR::Opcode::WriteStorageS8:
+ info.used_storage_buffer_types |= IR::Type::U8;
+ break;
+ case IR::Opcode::LoadStorageU16:
+ case IR::Opcode::LoadStorageS16:
+ case IR::Opcode::WriteStorageU16:
+ case IR::Opcode::WriteStorageS16:
+ info.used_storage_buffer_types |= IR::Type::U16;
+ break;
+ case IR::Opcode::LoadStorage32:
+ case IR::Opcode::WriteStorage32:
+ case IR::Opcode::StorageAtomicIAdd32:
+ case IR::Opcode::StorageAtomicUMin32:
+ case IR::Opcode::StorageAtomicUMax32:
+ case IR::Opcode::StorageAtomicAnd32:
+ case IR::Opcode::StorageAtomicOr32:
+ case IR::Opcode::StorageAtomicXor32:
+ case IR::Opcode::StorageAtomicExchange32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ break;
+ case IR::Opcode::LoadStorage64:
+ case IR::Opcode::WriteStorage64:
+ info.used_storage_buffer_types |= IR::Type::U32x2;
+ break;
+ case IR::Opcode::LoadStorage128:
+ case IR::Opcode::WriteStorage128:
+ info.used_storage_buffer_types |= IR::Type::U32x4;
+ break;
+ case IR::Opcode::SharedAtomicSMin32:
+ info.uses_atomic_s32_min = true;
+ break;
+ case IR::Opcode::SharedAtomicSMax32:
+ info.uses_atomic_s32_max = true;
+ break;
+ case IR::Opcode::SharedAtomicInc32:
+ info.uses_shared_increment = true;
+ break;
+ case IR::Opcode::SharedAtomicDec32:
+ info.uses_shared_decrement = true;
+ break;
+ case IR::Opcode::SharedAtomicExchange64:
+ info.uses_int64_bit_atomics = true;
+ break;
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::StorageAtomicInc32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_global_increment = true;
+ break;
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::StorageAtomicDec32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_global_decrement = true;
+ break;
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::StorageAtomicAddF32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f32_add = true;
+ break;
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::StorageAtomicAddF16x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f16x2_add = true;
+ break;
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::StorageAtomicAddF32x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f32x2_add = true;
+ break;
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::StorageAtomicMinF16x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f16x2_min = true;
+ break;
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::StorageAtomicMinF32x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f32x2_min = true;
+ break;
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::StorageAtomicMaxF16x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f16x2_max = true;
+ break;
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ case IR::Opcode::StorageAtomicMaxF32x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f32x2_max = true;
+ break;
+ case IR::Opcode::StorageAtomicSMin32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_s32_min = true;
+ break;
+ case IR::Opcode::StorageAtomicSMax32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_s32_max = true;
+ break;
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::StorageAtomicIAdd64:
+ case IR::Opcode::StorageAtomicSMin64:
+ case IR::Opcode::StorageAtomicUMin64:
+ case IR::Opcode::StorageAtomicSMax64:
+ case IR::Opcode::StorageAtomicUMax64:
+ case IR::Opcode::StorageAtomicAnd64:
+ case IR::Opcode::StorageAtomicOr64:
+ case IR::Opcode::StorageAtomicXor64:
+ info.used_storage_buffer_types |= IR::Type::U64;
+ info.uses_int64_bit_atomics = true;
+ break;
+ case IR::Opcode::BindlessImageAtomicIAdd32:
+ case IR::Opcode::BindlessImageAtomicSMin32:
+ case IR::Opcode::BindlessImageAtomicUMin32:
+ case IR::Opcode::BindlessImageAtomicSMax32:
+ case IR::Opcode::BindlessImageAtomicUMax32:
+ case IR::Opcode::BindlessImageAtomicInc32:
+ case IR::Opcode::BindlessImageAtomicDec32:
+ case IR::Opcode::BindlessImageAtomicAnd32:
+ case IR::Opcode::BindlessImageAtomicOr32:
+ case IR::Opcode::BindlessImageAtomicXor32:
+ case IR::Opcode::BindlessImageAtomicExchange32:
+ case IR::Opcode::BoundImageAtomicIAdd32:
+ case IR::Opcode::BoundImageAtomicSMin32:
+ case IR::Opcode::BoundImageAtomicUMin32:
+ case IR::Opcode::BoundImageAtomicSMax32:
+ case IR::Opcode::BoundImageAtomicUMax32:
+ case IR::Opcode::BoundImageAtomicInc32:
+ case IR::Opcode::BoundImageAtomicDec32:
+ case IR::Opcode::BoundImageAtomicAnd32:
+ case IR::Opcode::BoundImageAtomicOr32:
+ case IR::Opcode::BoundImageAtomicXor32:
+ case IR::Opcode::BoundImageAtomicExchange32:
+ case IR::Opcode::ImageAtomicIAdd32:
+ case IR::Opcode::ImageAtomicSMin32:
+ case IR::Opcode::ImageAtomicUMin32:
+ case IR::Opcode::ImageAtomicSMax32:
+ case IR::Opcode::ImageAtomicUMax32:
+ case IR::Opcode::ImageAtomicInc32:
+ case IR::Opcode::ImageAtomicDec32:
+ case IR::Opcode::ImageAtomicAnd32:
+ case IR::Opcode::ImageAtomicOr32:
+ case IR::Opcode::ImageAtomicXor32:
+ case IR::Opcode::ImageAtomicExchange32:
+ info.uses_atomic_image_u32 = true;
+ break;
+ default:
+ break;
+ }
+}
+
+void VisitFpModifiers(Info& info, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::FPAdd16:
+ case IR::Opcode::FPFma16:
+ case IR::Opcode::FPMul16:
+ case IR::Opcode::FPRoundEven16:
+ case IR::Opcode::FPFloor16:
+ case IR::Opcode::FPCeil16:
+ case IR::Opcode::FPTrunc16: {
+ const auto control{inst.Flags<IR::FpControl>()};
+ switch (control.fmz_mode) {
+ case IR::FmzMode::DontCare:
+ break;
+ case IR::FmzMode::FTZ:
+ case IR::FmzMode::FMZ:
+ info.uses_fp16_denorms_flush = true;
+ break;
+ case IR::FmzMode::None:
+ info.uses_fp16_denorms_preserve = true;
+ break;
+ }
+ break;
+ }
+ case IR::Opcode::FPAdd32:
+ case IR::Opcode::FPFma32:
+ case IR::Opcode::FPMul32:
+ case IR::Opcode::FPRoundEven32:
+ case IR::Opcode::FPFloor32:
+ case IR::Opcode::FPCeil32:
+ case IR::Opcode::FPTrunc32:
+ case IR::Opcode::FPOrdEqual32:
+ case IR::Opcode::FPUnordEqual32:
+ case IR::Opcode::FPOrdNotEqual32:
+ case IR::Opcode::FPUnordNotEqual32:
+ case IR::Opcode::FPOrdLessThan32:
+ case IR::Opcode::FPUnordLessThan32:
+ case IR::Opcode::FPOrdGreaterThan32:
+ case IR::Opcode::FPUnordGreaterThan32:
+ case IR::Opcode::FPOrdLessThanEqual32:
+ case IR::Opcode::FPUnordLessThanEqual32:
+ case IR::Opcode::FPOrdGreaterThanEqual32:
+ case IR::Opcode::FPUnordGreaterThanEqual32:
+ case IR::Opcode::ConvertF16F32:
+ case IR::Opcode::ConvertF64F32: {
+ const auto control{inst.Flags<IR::FpControl>()};
+ switch (control.fmz_mode) {
+ case IR::FmzMode::DontCare:
+ break;
+ case IR::FmzMode::FTZ:
+ case IR::FmzMode::FMZ:
+ info.uses_fp32_denorms_flush = true;
+ break;
+ case IR::FmzMode::None:
+ info.uses_fp32_denorms_preserve = true;
+ break;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void VisitCbufs(Info& info, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ case IR::Opcode::GetCbufU32:
+ case IR::Opcode::GetCbufF32:
+ case IR::Opcode::GetCbufU32x2: {
+ CheckCBufNVN(info, inst);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void Visit(Info& info, IR::Inst& inst) {
+ VisitUsages(info, inst);
+ VisitFpModifiers(info, inst);
+ VisitCbufs(info, inst);
+}
+
+void GatherInfoFromHeader(Environment& env, Info& info) {
+ Stage stage{env.ShaderStage()};
+ if (stage == Stage::Compute) {
+ return;
+ }
+ const auto& header{env.SPH()};
+ if (stage == Stage::Fragment) {
+ if (!info.loads_indexed_attributes) {
+ return;
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+ const auto vector{header.ps.imap_generic_vector[index]};
+ info.loads.mask[offset + 0] = vector.x != PixelImap::Unused;
+ info.loads.mask[offset + 1] = vector.y != PixelImap::Unused;
+ info.loads.mask[offset + 2] = vector.z != PixelImap::Unused;
+ info.loads.mask[offset + 3] = vector.w != PixelImap::Unused;
+ }
+ return;
+ }
+ if (info.loads_indexed_attributes) {
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
+ const auto mask = header.vtg.InputGeneric(index);
+ for (size_t i = 0; i < 4; ++i) {
+ info.loads.Set(attribute + i, mask[i]);
+ }
+ }
+ for (size_t index = 0; index < 8; ++index) {
+ const u16 mask{header.vtg.clip_distances};
+ info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
+ }
+ info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0);
+ info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0);
+ info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0);
+ info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0);
+ info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0);
+ info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0);
+ info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0);
+ info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0);
+ info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0);
+ info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0);
+ info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0);
+ info.loads.Set(IR::Attribute::TessellationEvaluationPointU,
+ header.vtg.tessellation_eval_point_u != 0);
+ info.loads.Set(IR::Attribute::TessellationEvaluationPointV,
+ header.vtg.tessellation_eval_point_v != 0);
+ info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0);
+ info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0);
+ // TODO: Legacy varyings
+ }
+ if (info.stores_indexed_attributes) {
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
+ const auto mask{header.vtg.OutputGeneric(index)};
+ for (size_t i = 0; i < 4; ++i) {
+ info.stores.Set(attribute + i, mask[i]);
+ }
+ }
+ for (size_t index = 0; index < 8; ++index) {
+ const u16 mask{header.vtg.omap_systemc.clip_distances};
+ info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
+ }
+ info.stores.Set(IR::Attribute::PrimitiveId,
+ header.vtg.omap_systemb.primitive_array_id != 0);
+ info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0);
+ info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0);
+ info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0);
+ info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0);
+ info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0);
+ info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0);
+ info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0);
+ info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0);
+ info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0);
+ info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0);
+ info.stores.Set(IR::Attribute::TessellationEvaluationPointU,
+ header.vtg.omap_systemc.tessellation_eval_point_u != 0);
+ info.stores.Set(IR::Attribute::TessellationEvaluationPointV,
+ header.vtg.omap_systemc.tessellation_eval_point_v != 0);
+ info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0);
+ info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0);
+ // TODO: Legacy varyings
+ }
+}
+} // Anonymous namespace
+
+void CollectShaderInfoPass(Environment& env, IR::Program& program) {
+ Info& info{program.info};
+ const u32 base{[&] {
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return 0x110u;
+ case Stage::TessellationControl:
+ return 0x210u;
+ case Stage::TessellationEval:
+ return 0x310u;
+ case Stage::Geometry:
+ return 0x410u;
+ case Stage::Fragment:
+ return 0x510u;
+ case Stage::Compute:
+ return 0x310u;
+ }
+ throw InvalidArgument("Invalid stage {}", program.stage);
+ }()};
+ info.nvn_buffer_base = base;
+
+ for (IR::Block* const block : program.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ Visit(info, inst);
+ }
+ }
+ GatherInfoFromHeader(env, info);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..8dd6d6c2c
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,610 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <tuple>
+#include <type_traits>
+
+#include "common/bit_cast.h"
+#include "common/bit_util.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+// Metaprogramming stuff to get arguments information out of a lambda
+template <typename Func>
+struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {};
+
+template <typename ReturnType, typename LambdaType, typename... Args>
+struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> {
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+
+ static constexpr size_t NUM_ARGS{sizeof...(Args)};
+};
+
+template <typename T>
+[[nodiscard]] T Arg(const IR::Value& value) {
+ if constexpr (std::is_same_v<T, bool>) {
+ return value.U1();
+ } else if constexpr (std::is_same_v<T, u32>) {
+ return value.U32();
+ } else if constexpr (std::is_same_v<T, s32>) {
+ return static_cast<s32>(value.U32());
+ } else if constexpr (std::is_same_v<T, f32>) {
+ return value.F32();
+ } else if constexpr (std::is_same_v<T, u64>) {
+ return value.U64();
+ }
+}
+
+template <typename T, typename ImmFn>
+bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
+ const IR::Value lhs{inst.Arg(0)};
+ const IR::Value rhs{inst.Arg(1)};
+
+ const bool is_lhs_immediate{lhs.IsImmediate()};
+ const bool is_rhs_immediate{rhs.IsImmediate()};
+
+ if (is_lhs_immediate && is_rhs_immediate) {
+ const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
+ inst.ReplaceUsesWith(IR::Value{result});
+ return false;
+ }
+ if (is_lhs_immediate && !is_rhs_immediate) {
+ IR::Inst* const rhs_inst{rhs.InstRecursive()};
+ if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
+ const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
+ inst.SetArg(0, rhs_inst->Arg(0));
+ inst.SetArg(1, IR::Value{combined});
+ } else {
+ // Normalize
+ inst.SetArg(0, rhs);
+ inst.SetArg(1, lhs);
+ }
+ }
+ if (!is_lhs_immediate && is_rhs_immediate) {
+ const IR::Inst* const lhs_inst{lhs.InstRecursive()};
+ if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
+ const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
+ inst.SetArg(0, lhs_inst->Arg(0));
+ inst.SetArg(1, IR::Value{combined});
+ }
+ }
+ return true;
+}
+
+template <typename Func>
+bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
+ if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
+ return false;
+ }
+ using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
+ inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
+ return true;
+}
+
+void FoldGetRegister(IR::Inst& inst) {
+ if (inst.Arg(0).Reg() == IR::Reg::RZ) {
+ inst.ReplaceUsesWith(IR::Value{u32{0}});
+ }
+}
+
+void FoldGetPred(IR::Inst& inst) {
+ if (inst.Arg(0).Pred() == IR::Pred::PT) {
+ inst.ReplaceUsesWith(IR::Value{true});
+ }
+}
+
+/// Replaces the pattern generated by two XMAD multiplications
+bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
+ /*
+ * We are looking for this pattern:
+ * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
+ * %rhs_mul = IMul32 %rhs_bfe, %factor_b
+ * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
+ * %rhs_mul = IMul32 %lhs_bfe, %factor_b
+ * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
+ * %result = IAdd32 %lhs_shl, %rhs_mul
+ *
+ * And replacing it with
+ * %result = IMul32 %factor_a, %factor_b
+ *
+ * This optimization has been proven safe by LLVM and MSVC.
+ */
+ const IR::Value lhs_arg{inst.Arg(0)};
+ const IR::Value rhs_arg{inst.Arg(1)};
+ if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
+ if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
+ lhs_shl->Arg(1) != IR::Value{16U}) {
+ return false;
+ }
+ if (lhs_shl->Arg(0).IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
+ IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
+ if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
+ return false;
+ }
+ if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
+ return false;
+ }
+ const IR::U32 factor_b{lhs_mul->Arg(1)};
+ if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
+ IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
+ if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return false;
+ }
+ if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return false;
+ }
+ if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
+ return false;
+ }
+ if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
+ return false;
+ }
+ if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
+ return false;
+ }
+ const IR::U32 factor_a{lhs_bfe->Arg(0)};
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
+ return true;
+}
+
+template <typename T>
+void FoldAdd(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ return;
+ }
+ if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ return;
+ }
+ if constexpr (std::is_same_v<T, u32>) {
+ if (FoldXmadMultiply(block, inst)) {
+ return;
+ }
+ }
+}
+
+void FoldISub32(IR::Inst& inst) {
+ if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) {
+ return;
+ }
+ if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) {
+ return;
+ }
+ // ISub32 is generally used to subtract two constant buffers, compare and replace this with
+ // zero if they equal.
+ const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
+ return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
+ b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
+ a->Arg(1) == b->Arg(1);
+ }};
+ IR::Inst* op_a{inst.Arg(0).InstRecursive()};
+ IR::Inst* op_b{inst.Arg(1).InstRecursive()};
+ if (equal_cbuf(op_a, op_b)) {
+ inst.ReplaceUsesWith(IR::Value{u32{0}});
+ return;
+ }
+ // It's also possible a value is being added to a cbuf and then subtracted
+ if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
+ // Canonicalize local variables to simplify the following logic
+ std::swap(op_a, op_b);
+ }
+ if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
+ return;
+ }
+ IR::Inst* const inst_cbuf{op_b};
+ if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
+ return;
+ }
+ IR::Value add_op_a{op_a->Arg(0)};
+ IR::Value add_op_b{op_a->Arg(1)};
+ if (add_op_b.IsImmediate()) {
+ // Canonicalize
+ std::swap(add_op_a, add_op_b);
+ }
+ if (add_op_b.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const add_cbuf{add_op_b.InstRecursive()};
+ if (equal_cbuf(add_cbuf, inst_cbuf)) {
+ inst.ReplaceUsesWith(add_op_a);
+ }
+}
+
+void FoldSelect(IR::Inst& inst) {
+ const IR::Value cond{inst.Arg(0)};
+ if (cond.IsImmediate()) {
+ inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
+ }
+}
+
+void FoldFPMul32(IR::Inst& inst) {
+ const auto control{inst.Flags<IR::FpControl>()};
+ if (control.no_contraction) {
+ return;
+ }
+ // Fold interpolation operations
+ const IR::Value lhs_value{inst.Arg(0)};
+ const IR::Value rhs_value{inst.Arg(1)};
+ if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const lhs_op{lhs_value.InstRecursive()};
+ IR::Inst* const rhs_op{rhs_value.InstRecursive()};
+ if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
+ rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
+ return;
+ }
+ const IR::Value recip_source{rhs_op->Arg(0)};
+ const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()};
+ if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const attr_a{recip_source.InstRecursive()};
+ IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
+ if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
+ attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
+ return;
+ }
+ if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
+ inst.ReplaceUsesWith(lhs_op->Arg(0));
+ }
+}
+
+void FoldLogicalAnd(IR::Inst& inst) {
+ if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate()) {
+ if (rhs.U1()) {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ } else {
+ inst.ReplaceUsesWith(IR::Value{false});
+ }
+ }
+}
+
+void FoldLogicalOr(IR::Inst& inst) {
+ if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate()) {
+ if (rhs.U1()) {
+ inst.ReplaceUsesWith(IR::Value{true});
+ } else {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ }
+ }
+}
+
+void FoldLogicalNot(IR::Inst& inst) {
+ const IR::U1 value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ inst.ReplaceUsesWith(IR::Value{!value.U1()});
+ return;
+ }
+ IR::Inst* const arg{value.InstRecursive()};
+ if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
+ inst.ReplaceUsesWith(arg->Arg(0));
+ }
+}
+
+template <IR::Opcode op, typename Dest, typename Source>
+void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
+ const IR::Value value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
+ return;
+ }
+ IR::Inst* const arg_inst{value.InstRecursive()};
+ if (arg_inst->GetOpcode() == reverse) {
+ inst.ReplaceUsesWith(arg_inst->Arg(0));
+ return;
+ }
+ if constexpr (op == IR::Opcode::BitCastF32U32) {
+ if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
+ // Replace the bitcast with a typed constant buffer read
+ inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
+ inst.SetArg(0, arg_inst->Arg(0));
+ inst.SetArg(1, arg_inst->Arg(1));
+ return;
+ }
+ }
+}
+
+void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
+ const IR::Value value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const arg_inst{value.InstRecursive()};
+ if (arg_inst->GetOpcode() == reverse) {
+ inst.ReplaceUsesWith(arg_inst->Arg(0));
+ return;
+ }
+}
+
+template <typename Func, size_t... I>
+IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
+ using Traits = LambdaTraits<decltype(func)>;
+ return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
+}
+
+std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
+ IR::Opcode construct, u32 first_index) {
+ IR::Inst* const inst{inst_value.InstRecursive()};
+ if (inst->GetOpcode() == construct) {
+ return inst->Arg(first_index);
+ }
+ if (inst->GetOpcode() != insert) {
+ return std::nullopt;
+ }
+ IR::Value value_index{inst->Arg(2)};
+ if (!value_index.IsImmediate()) {
+ return std::nullopt;
+ }
+ const u32 second_index{value_index.U32()};
+ if (first_index != second_index) {
+ IR::Value value_composite{inst->Arg(0)};
+ if (value_composite.IsImmediate()) {
+ return std::nullopt;
+ }
+ return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
+ }
+ return inst->Arg(1);
+}
+
+void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
+ const IR::Value value_1{inst.Arg(0)};
+ const IR::Value value_2{inst.Arg(1)};
+ if (value_1.IsImmediate()) {
+ return;
+ }
+ if (!value_2.IsImmediate()) {
+ return;
+ }
+ const u32 first_index{value_2.U32()};
+ const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
+ if (!result) {
+ return;
+ }
+ inst.ReplaceUsesWith(*result);
+}
+
+IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) {
+ if (value.IsImmediate()) {
+ return value;
+ }
+ IR::Inst* const inst{value.InstRecursive()};
+ if (inst->GetOpcode() == expected_cast) {
+ return inst->Arg(0).Resolve();
+ }
+ return value;
+}
+
+void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
+ const IR::Value swizzle{inst.Arg(2)};
+ if (!swizzle.IsImmediate()) {
+ return;
+ }
+ const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)};
+ const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)};
+ if (value_1.IsImmediate()) {
+ return;
+ }
+ const u32 swizzle_value{swizzle.U32()};
+ if (swizzle_value != 0x99 && swizzle_value != 0xA5) {
+ return;
+ }
+ IR::Inst* const inst2{value_1.InstRecursive()};
+ if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) {
+ return;
+ }
+ const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
+ if (value_2 != value_3) {
+ return;
+ }
+ const IR::Value index{inst2->Arg(1)};
+ const IR::Value clamp{inst2->Arg(2)};
+ const IR::Value segmentation_mask{inst2->Arg(3)};
+ if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
+ return;
+ }
+ if (clamp.U32() != 3 || segmentation_mask.U32() != 28) {
+ return;
+ }
+ if (swizzle_value == 0x99) {
+ // DPdxFine
+ if (index.U32() == 1) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)}));
+ }
+ } else if (swizzle_value == 0xA5) {
+ // DPdyFine
+ if (index.U32() == 2) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)}));
+ }
+ }
+}
+
+void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetRegister:
+ return FoldGetRegister(inst);
+ case IR::Opcode::GetPred:
+ return FoldGetPred(inst);
+ case IR::Opcode::IAdd32:
+ return FoldAdd<u32>(block, inst);
+ case IR::Opcode::ISub32:
+ return FoldISub32(inst);
+ case IR::Opcode::IMul32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
+ return;
+ case IR::Opcode::ShiftRightArithmetic32:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
+ return;
+ case IR::Opcode::BitCastF32U32:
+ return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
+ case IR::Opcode::BitCastU32F32:
+ return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
+ case IR::Opcode::IAdd64:
+ return FoldAdd<u64>(block, inst);
+ case IR::Opcode::PackHalf2x16:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
+ case IR::Opcode::UnpackHalf2x16:
+ return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
+ case IR::Opcode::SelectU1:
+ case IR::Opcode::SelectU8:
+ case IR::Opcode::SelectU16:
+ case IR::Opcode::SelectU32:
+ case IR::Opcode::SelectU64:
+ case IR::Opcode::SelectF16:
+ case IR::Opcode::SelectF32:
+ case IR::Opcode::SelectF64:
+ return FoldSelect(inst);
+ case IR::Opcode::FPMul32:
+ return FoldFPMul32(inst);
+ case IR::Opcode::LogicalAnd:
+ return FoldLogicalAnd(inst);
+ case IR::Opcode::LogicalOr:
+ return FoldLogicalOr(inst);
+ case IR::Opcode::LogicalNot:
+ return FoldLogicalNot(inst);
+ case IR::Opcode::SLessThan:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
+ return;
+ case IR::Opcode::ULessThan:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
+ return;
+ case IR::Opcode::SLessThanEqual:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
+ return;
+ case IR::Opcode::ULessThanEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
+ return;
+ case IR::Opcode::SGreaterThan:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
+ return;
+ case IR::Opcode::UGreaterThan:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
+ return;
+ case IR::Opcode::SGreaterThanEqual:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
+ return;
+ case IR::Opcode::UGreaterThanEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
+ return;
+ case IR::Opcode::IEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
+ return;
+ case IR::Opcode::INotEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
+ return;
+ case IR::Opcode::BitwiseAnd32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
+ return;
+ case IR::Opcode::BitwiseOr32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
+ return;
+ case IR::Opcode::BitwiseXor32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
+ return;
+ case IR::Opcode::BitFieldUExtract:
+ FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
+ if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
+ throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
+ base, shift, count);
+ }
+ return (base >> shift) & ((1U << count) - 1);
+ });
+ return;
+ case IR::Opcode::BitFieldSExtract:
+ FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
+ const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
+ const size_t left_shift{32 - back_shift};
+ const size_t right_shift{static_cast<size_t>(32 - count)};
+ if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
+ throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
+ base, shift, count);
+ }
+ return static_cast<u32>((base << left_shift) >> right_shift);
+ });
+ return;
+ case IR::Opcode::BitFieldInsert:
+ FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
+ if (bits >= 32 || offset >= 32) {
+ throw LogicError("Undefined result in {}({}, {}, {}, {})",
+ IR::Opcode::BitFieldInsert, base, insert, offset, bits);
+ }
+ return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
+ });
+ return;
+ case IR::Opcode::CompositeExtractU32x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
+ IR::Opcode::CompositeInsertU32x2);
+ case IR::Opcode::CompositeExtractU32x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
+ IR::Opcode::CompositeInsertU32x3);
+ case IR::Opcode::CompositeExtractU32x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
+ IR::Opcode::CompositeInsertU32x4);
+ case IR::Opcode::CompositeExtractF32x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
+ IR::Opcode::CompositeInsertF32x2);
+ case IR::Opcode::CompositeExtractF32x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
+ IR::Opcode::CompositeInsertF32x3);
+ case IR::Opcode::CompositeExtractF32x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
+ IR::Opcode::CompositeInsertF32x4);
+ case IR::Opcode::CompositeExtractF16x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
+ IR::Opcode::CompositeInsertF16x2);
+ case IR::Opcode::CompositeExtractF16x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
+ IR::Opcode::CompositeInsertF16x3);
+ case IR::Opcode::CompositeExtractF16x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
+ IR::Opcode::CompositeInsertF16x4);
+ case IR::Opcode::FSwizzleAdd:
+ return FoldFSwizzleAdd(block, inst);
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void ConstantPropagationPass(IR::Program& program) {
+ const auto end{program.post_order_blocks.rend()};
+ for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
+ IR::Block* const block{*it};
+ for (IR::Inst& inst : block->Instructions()) {
+ ConstantPropagation(*block, inst);
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
new file mode 100644
index 000000000..400836301
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void DeadCodeEliminationPass(IR::Program& program) {
+ // We iterate over the instructions in reverse order.
+ // This is because removing an instruction reduces the number of uses for earlier instructions.
+ for (IR::Block* const block : program.post_order_blocks) {
+ auto it{block->end()};
+ while (it != block->begin()) {
+ --it;
+ if (!it->HasUses() && !it->MayHaveSideEffects()) {
+ it->Invalidate();
+ it = block->Instructions().erase(it);
+ }
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
new file mode 100644
index 000000000..055ba9c54
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void VertexATransformPass(IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (inst.GetOpcode() == IR::Opcode::Epilogue) {
+ return inst.Invalidate();
+ }
+ }
+ }
+}
+
+void VertexBTransformPass(IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (inst.GetOpcode() == IR::Opcode::Prologue) {
+ return inst.Invalidate();
+ }
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 000000000..4197b0095
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,526 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <compare>
+#include <optional>
+#include <queue>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "common/alignment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/breadth_first_search.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+/// Address in constant buffers to the storage buffer descriptor
+struct StorageBufferAddr {
+ auto operator<=>(const StorageBufferAddr&) const noexcept = default;
+
+ u32 index;
+ u32 offset;
+};
+
+/// Block iterator to a global memory instruction and the storage buffer it uses
+struct StorageInst {
+ StorageBufferAddr storage_buffer;
+ IR::Inst* inst;
+ IR::Block* block;
+};
+
+/// Bias towards a certain range of constant buffers when looking for storage buffers
+struct Bias {
+ u32 index;
+ u32 offset_begin;
+ u32 offset_end;
+};
+
+using boost::container::flat_set;
+using boost::container::small_vector;
+using StorageBufferSet =
+ flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
+using StorageInstVector = small_vector<StorageInst, 24>;
+using StorageWritesSet =
+ flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
+
+struct StorageInfo {
+ StorageBufferSet set;
+ StorageInstVector to_replace;
+ StorageWritesSet writes;
+};
+
+/// Returns true when the instruction is a global memory instruction
+bool IsGlobalMemory(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ case IR::Opcode::GlobalAtomicIAdd32:
+ case IR::Opcode::GlobalAtomicSMin32:
+ case IR::Opcode::GlobalAtomicUMin32:
+ case IR::Opcode::GlobalAtomicSMax32:
+ case IR::Opcode::GlobalAtomicUMax32:
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::GlobalAtomicAnd32:
+ case IR::Opcode::GlobalAtomicOr32:
+ case IR::Opcode::GlobalAtomicXor32:
+ case IR::Opcode::GlobalAtomicExchange32:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Returns true when the instruction is a global memory instruction
+bool IsGlobalMemoryWrite(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ case IR::Opcode::GlobalAtomicIAdd32:
+ case IR::Opcode::GlobalAtomicSMin32:
+ case IR::Opcode::GlobalAtomicUMin32:
+ case IR::Opcode::GlobalAtomicSMax32:
+ case IR::Opcode::GlobalAtomicUMax32:
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::GlobalAtomicAnd32:
+ case IR::Opcode::GlobalAtomicOr32:
+ case IR::Opcode::GlobalAtomicXor32:
+ case IR::Opcode::GlobalAtomicExchange32:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Converts a global memory opcode to its storage buffer equivalent
+IR::Opcode GlobalToStorage(IR::Opcode opcode) {
+ switch (opcode) {
+ case IR::Opcode::LoadGlobalS8:
+ return IR::Opcode::LoadStorageS8;
+ case IR::Opcode::LoadGlobalU8:
+ return IR::Opcode::LoadStorageU8;
+ case IR::Opcode::LoadGlobalS16:
+ return IR::Opcode::LoadStorageS16;
+ case IR::Opcode::LoadGlobalU16:
+ return IR::Opcode::LoadStorageU16;
+ case IR::Opcode::LoadGlobal32:
+ return IR::Opcode::LoadStorage32;
+ case IR::Opcode::LoadGlobal64:
+ return IR::Opcode::LoadStorage64;
+ case IR::Opcode::LoadGlobal128:
+ return IR::Opcode::LoadStorage128;
+ case IR::Opcode::WriteGlobalS8:
+ return IR::Opcode::WriteStorageS8;
+ case IR::Opcode::WriteGlobalU8:
+ return IR::Opcode::WriteStorageU8;
+ case IR::Opcode::WriteGlobalS16:
+ return IR::Opcode::WriteStorageS16;
+ case IR::Opcode::WriteGlobalU16:
+ return IR::Opcode::WriteStorageU16;
+ case IR::Opcode::WriteGlobal32:
+ return IR::Opcode::WriteStorage32;
+ case IR::Opcode::WriteGlobal64:
+ return IR::Opcode::WriteStorage64;
+ case IR::Opcode::WriteGlobal128:
+ return IR::Opcode::WriteStorage128;
+ case IR::Opcode::GlobalAtomicIAdd32:
+ return IR::Opcode::StorageAtomicIAdd32;
+ case IR::Opcode::GlobalAtomicSMin32:
+ return IR::Opcode::StorageAtomicSMin32;
+ case IR::Opcode::GlobalAtomicUMin32:
+ return IR::Opcode::StorageAtomicUMin32;
+ case IR::Opcode::GlobalAtomicSMax32:
+ return IR::Opcode::StorageAtomicSMax32;
+ case IR::Opcode::GlobalAtomicUMax32:
+ return IR::Opcode::StorageAtomicUMax32;
+ case IR::Opcode::GlobalAtomicInc32:
+ return IR::Opcode::StorageAtomicInc32;
+ case IR::Opcode::GlobalAtomicDec32:
+ return IR::Opcode::StorageAtomicDec32;
+ case IR::Opcode::GlobalAtomicAnd32:
+ return IR::Opcode::StorageAtomicAnd32;
+ case IR::Opcode::GlobalAtomicOr32:
+ return IR::Opcode::StorageAtomicOr32;
+ case IR::Opcode::GlobalAtomicXor32:
+ return IR::Opcode::StorageAtomicXor32;
+ case IR::Opcode::GlobalAtomicIAdd64:
+ return IR::Opcode::StorageAtomicIAdd64;
+ case IR::Opcode::GlobalAtomicSMin64:
+ return IR::Opcode::StorageAtomicSMin64;
+ case IR::Opcode::GlobalAtomicUMin64:
+ return IR::Opcode::StorageAtomicUMin64;
+ case IR::Opcode::GlobalAtomicSMax64:
+ return IR::Opcode::StorageAtomicSMax64;
+ case IR::Opcode::GlobalAtomicUMax64:
+ return IR::Opcode::StorageAtomicUMax64;
+ case IR::Opcode::GlobalAtomicAnd64:
+ return IR::Opcode::StorageAtomicAnd64;
+ case IR::Opcode::GlobalAtomicOr64:
+ return IR::Opcode::StorageAtomicOr64;
+ case IR::Opcode::GlobalAtomicXor64:
+ return IR::Opcode::StorageAtomicXor64;
+ case IR::Opcode::GlobalAtomicExchange32:
+ return IR::Opcode::StorageAtomicExchange32;
+ case IR::Opcode::GlobalAtomicExchange64:
+ return IR::Opcode::StorageAtomicExchange64;
+ case IR::Opcode::GlobalAtomicAddF32:
+ return IR::Opcode::StorageAtomicAddF32;
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ return IR::Opcode::StorageAtomicAddF16x2;
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ return IR::Opcode::StorageAtomicMinF16x2;
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ return IR::Opcode::StorageAtomicMaxF16x2;
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ return IR::Opcode::StorageAtomicAddF32x2;
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ return IR::Opcode::StorageAtomicMinF32x2;
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ return IR::Opcode::StorageAtomicMaxF32x2;
+ default:
+ throw InvalidArgument("Invalid global memory opcode {}", opcode);
+ }
+}
+
+/// Returns true when a storage buffer address satisfies a bias
+bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
+ return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
+ storage_buffer.offset < bias.offset_end;
+}
+
+struct LowAddrInfo {
+ IR::U32 value;
+ s32 imm_offset;
+};
+
+/// Tries to track the first 32-bits of a global memory instruction
+std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
+ // The first argument is the low level GPU pointer to the global memory instruction
+ const IR::Value addr{inst->Arg(0)};
+ if (addr.IsImmediate()) {
+ // Not much we can do if it's an immediate
+ return std::nullopt;
+ }
+ // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
+ IR::Inst* addr_inst{addr.InstRecursive()};
+ s32 imm_offset{0};
+ if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
+ // If it's an IAdd64, get the immediate offset it is applying and grab the address
+ // instruction. This expects for the instruction to be canonicalized having the address on
+ // the first argument and the immediate offset on the second one.
+ const IR::U64 imm_offset_value{addr_inst->Arg(1)};
+ if (!imm_offset_value.IsImmediate()) {
+ return std::nullopt;
+ }
+ imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
+ const IR::U64 iadd_addr{addr_inst->Arg(0)};
+ if (iadd_addr.IsImmediate()) {
+ return std::nullopt;
+ }
+ addr_inst = iadd_addr.InstRecursive();
+ }
+ // With IAdd64 handled, now PackUint2x32 is expected
+ if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
+ // PackUint2x32 is expected to be generated from a vector
+ const IR::Value vector{addr_inst->Arg(0)};
+ if (vector.IsImmediate()) {
+ return std::nullopt;
+ }
+ addr_inst = vector.InstRecursive();
+ }
+ // The vector is expected to be a CompositeConstructU32x2
+ if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
+ return std::nullopt;
+ }
+ // Grab the first argument from the CompositeConstructU32x2, this is the low address.
+ return LowAddrInfo{
+ .value{IR::U32{addr_inst->Arg(0)}},
+ .imm_offset = imm_offset,
+ };
+}
+
+/// Tries to track the storage buffer address used by a global memory instruction
+std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
+ const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
+ if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
+ return std::nullopt;
+ }
+ const IR::Value index{inst->Arg(0)};
+ const IR::Value offset{inst->Arg(1)};
+ if (!index.IsImmediate()) {
+ // Definitely not a storage buffer if it's read from a
+ // non-immediate index
+ return std::nullopt;
+ }
+ if (!offset.IsImmediate()) {
+ // TODO: Support SSBO arrays
+ return std::nullopt;
+ }
+ const StorageBufferAddr storage_buffer{
+ .index = index.U32(),
+ .offset = offset.U32(),
+ };
+ if (!Common::IsAligned(storage_buffer.offset, 16)) {
+ // The SSBO pointer has to be aligned
+ return std::nullopt;
+ }
+ if (bias && !MeetsBias(storage_buffer, *bias)) {
+ // We have to blacklist some addresses in case we wrongly
+ // point to them
+ return std::nullopt;
+ }
+ return storage_buffer;
+ }};
+ return BreadthFirstSearch(value, pred);
+}
+
+/// Collects the storage buffer used by a global memory instruction and the instruction itself
+void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
+ // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
+ // avoid getting false positives
+ static constexpr Bias nvn_bias{
+ .index = 0,
+ .offset_begin = 0x110,
+ .offset_end = 0x610,
+ };
+ // Track the low address of the instruction
+ const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
+ if (!low_addr_info) {
+ // Failed to track the low address, use NVN fallbacks
+ return;
+ }
+ // First try to find storage buffers in the NVN address
+ const IR::U32 low_addr{low_addr_info->value};
+ std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
+ if (!storage_buffer) {
+ // If it fails, track without a bias
+ storage_buffer = Track(low_addr, nullptr);
+ if (!storage_buffer) {
+ // If that also fails, use NVN fallbacks
+ return;
+ }
+ }
+ // Collect storage buffer and the instruction
+ if (IsGlobalMemoryWrite(inst)) {
+ info.writes.insert(*storage_buffer);
+ }
+ info.set.insert(*storage_buffer);
+ info.to_replace.push_back(StorageInst{
+ .storage_buffer{*storage_buffer},
+ .inst = &inst,
+ .block = &block,
+ });
+}
+
+/// Returns the offset in indices (not bytes) for an equivalent storage instruction
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ IR::U32 offset;
+ if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
+ offset = low_addr->value;
+ if (low_addr->imm_offset != 0) {
+ offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
+ }
+ } else {
+ offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
+ }
+ // Subtract the least significant 32 bits from the guest offset. The result is the storage
+ // buffer offset in bytes.
+ const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+ return ir.ISub(offset, low_cbuf);
+}
+
+/// Replace a global memory load instruction with its storage buffer equivalent
+void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+ const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+ const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
+ inst.ReplaceUsesWith(value);
+}
+
+/// Replace a global memory write instruction with its storage buffer equivalent
+void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+ const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+ block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
+ inst.Invalidate();
+}
+
+/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
+void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+ const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+ const IR::Value value{
+ &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
+ inst.ReplaceUsesWith(value);
+}
+
+/// Replace a global memory instruction with its storage buffer equivalent
+void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ return ReplaceLoad(block, inst, storage_index, offset);
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ return ReplaceWrite(block, inst, storage_index, offset);
+ case IR::Opcode::GlobalAtomicIAdd32:
+ case IR::Opcode::GlobalAtomicSMin32:
+ case IR::Opcode::GlobalAtomicUMin32:
+ case IR::Opcode::GlobalAtomicSMax32:
+ case IR::Opcode::GlobalAtomicUMax32:
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::GlobalAtomicAnd32:
+ case IR::Opcode::GlobalAtomicOr32:
+ case IR::Opcode::GlobalAtomicXor32:
+ case IR::Opcode::GlobalAtomicExchange32:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ return ReplaceAtomic(block, inst, storage_index, offset);
+ default:
+ throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
+ }
+}
+} // Anonymous namespace
+
+void GlobalMemoryToStorageBufferPass(IR::Program& program) {
+ StorageInfo info;
+ for (IR::Block* const block : program.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (!IsGlobalMemory(inst)) {
+ continue;
+ }
+ CollectStorageBuffers(*block, inst, info);
+ }
+ }
+ for (const StorageBufferAddr& storage_buffer : info.set) {
+ program.info.storage_buffers_descriptors.push_back({
+ .cbuf_index = storage_buffer.index,
+ .cbuf_offset = storage_buffer.offset,
+ .count = 1,
+ .is_written = info.writes.contains(storage_buffer),
+ });
+ }
+ for (const StorageInst& storage_inst : info.to_replace) {
+ const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
+ const auto it{info.set.find(storage_inst.storage_buffer)};
+ const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
+ IR::Block* const block{storage_inst.block};
+ IR::Inst* const inst{storage_inst.inst};
+ const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
+ Replace(*block, *inst, index, offset);
+ }
+}
+
+template <typename Descriptors, typename Descriptor, typename Func>
+static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+ // TODO: Handle arrays
+ const auto it{std::ranges::find_if(descriptors, pred)};
+ if (it != descriptors.end()) {
+ return static_cast<u32>(std::distance(descriptors.begin(), it));
+ }
+ descriptors.push_back(desc);
+ return static_cast<u32>(descriptors.size()) - 1;
+}
+
+void JoinStorageInfo(Info& base, Info& source) {
+ auto& descriptors = base.storage_buffers_descriptors;
+ for (auto& desc : source.storage_buffers_descriptors) {
+ auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
+ return desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
+ })};
+ if (it != descriptors.end()) {
+ it->is_written |= desc.is_written;
+ continue;
+ }
+ descriptors.push_back(desc);
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
new file mode 100644
index 000000000..e9b55f835
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void IdentityRemovalPass(IR::Program& program) {
+ std::vector<IR::Inst*> to_invalidate;
+ for (IR::Block* const block : program.blocks) {
+ for (auto inst = block->begin(); inst != block->end();) {
+ const size_t num_args{inst->NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::Value arg;
+ while ((arg = inst->Arg(i)).IsIdentity()) {
+ inst->SetArg(i, arg.Inst()->Arg(0));
+ }
+ }
+ if (inst->GetOpcode() == IR::Opcode::Identity ||
+ inst->GetOpcode() == IR::Opcode::Void) {
+ to_invalidate.push_back(&*inst);
+ inst = block->Instructions().erase(inst);
+ } else {
+ ++inst;
+ }
+ }
+ }
+ for (IR::Inst* const inst : to_invalidate) {
+ inst->Invalidate();
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
new file mode 100644
index 000000000..773e1f961
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+IR::Opcode Replace(IR::Opcode op) {
+ switch (op) {
+ case IR::Opcode::FPAbs16:
+ return IR::Opcode::FPAbs32;
+ case IR::Opcode::FPAdd16:
+ return IR::Opcode::FPAdd32;
+ case IR::Opcode::FPCeil16:
+ return IR::Opcode::FPCeil32;
+ case IR::Opcode::FPFloor16:
+ return IR::Opcode::FPFloor32;
+ case IR::Opcode::FPFma16:
+ return IR::Opcode::FPFma32;
+ case IR::Opcode::FPMul16:
+ return IR::Opcode::FPMul32;
+ case IR::Opcode::FPNeg16:
+ return IR::Opcode::FPNeg32;
+ case IR::Opcode::FPRoundEven16:
+ return IR::Opcode::FPRoundEven32;
+ case IR::Opcode::FPSaturate16:
+ return IR::Opcode::FPSaturate32;
+ case IR::Opcode::FPClamp16:
+ return IR::Opcode::FPClamp32;
+ case IR::Opcode::FPTrunc16:
+ return IR::Opcode::FPTrunc32;
+ case IR::Opcode::CompositeConstructF16x2:
+ return IR::Opcode::CompositeConstructF32x2;
+ case IR::Opcode::CompositeConstructF16x3:
+ return IR::Opcode::CompositeConstructF32x3;
+ case IR::Opcode::CompositeConstructF16x4:
+ return IR::Opcode::CompositeConstructF32x4;
+ case IR::Opcode::CompositeExtractF16x2:
+ return IR::Opcode::CompositeExtractF32x2;
+ case IR::Opcode::CompositeExtractF16x3:
+ return IR::Opcode::CompositeExtractF32x3;
+ case IR::Opcode::CompositeExtractF16x4:
+ return IR::Opcode::CompositeExtractF32x4;
+ case IR::Opcode::CompositeInsertF16x2:
+ return IR::Opcode::CompositeInsertF32x2;
+ case IR::Opcode::CompositeInsertF16x3:
+ return IR::Opcode::CompositeInsertF32x3;
+ case IR::Opcode::CompositeInsertF16x4:
+ return IR::Opcode::CompositeInsertF32x4;
+ case IR::Opcode::FPOrdEqual16:
+ return IR::Opcode::FPOrdEqual32;
+ case IR::Opcode::FPUnordEqual16:
+ return IR::Opcode::FPUnordEqual32;
+ case IR::Opcode::FPOrdNotEqual16:
+ return IR::Opcode::FPOrdNotEqual32;
+ case IR::Opcode::FPUnordNotEqual16:
+ return IR::Opcode::FPUnordNotEqual32;
+ case IR::Opcode::FPOrdLessThan16:
+ return IR::Opcode::FPOrdLessThan32;
+ case IR::Opcode::FPUnordLessThan16:
+ return IR::Opcode::FPUnordLessThan32;
+ case IR::Opcode::FPOrdGreaterThan16:
+ return IR::Opcode::FPOrdGreaterThan32;
+ case IR::Opcode::FPUnordGreaterThan16:
+ return IR::Opcode::FPUnordGreaterThan32;
+ case IR::Opcode::FPOrdLessThanEqual16:
+ return IR::Opcode::FPOrdLessThanEqual32;
+ case IR::Opcode::FPUnordLessThanEqual16:
+ return IR::Opcode::FPUnordLessThanEqual32;
+ case IR::Opcode::FPOrdGreaterThanEqual16:
+ return IR::Opcode::FPOrdGreaterThanEqual32;
+ case IR::Opcode::FPUnordGreaterThanEqual16:
+ return IR::Opcode::FPUnordGreaterThanEqual32;
+ case IR::Opcode::FPIsNan16:
+ return IR::Opcode::FPIsNan32;
+ case IR::Opcode::ConvertS16F16:
+ return IR::Opcode::ConvertS16F32;
+ case IR::Opcode::ConvertS32F16:
+ return IR::Opcode::ConvertS32F32;
+ case IR::Opcode::ConvertS64F16:
+ return IR::Opcode::ConvertS64F32;
+ case IR::Opcode::ConvertU16F16:
+ return IR::Opcode::ConvertU16F32;
+ case IR::Opcode::ConvertU32F16:
+ return IR::Opcode::ConvertU32F32;
+ case IR::Opcode::ConvertU64F16:
+ return IR::Opcode::ConvertU64F32;
+ case IR::Opcode::PackFloat2x16:
+ return IR::Opcode::PackHalf2x16;
+ case IR::Opcode::UnpackFloat2x16:
+ return IR::Opcode::UnpackHalf2x16;
+ case IR::Opcode::ConvertF32F16:
+ return IR::Opcode::Identity;
+ case IR::Opcode::ConvertF16F32:
+ return IR::Opcode::Identity;
+ case IR::Opcode::ConvertF16S8:
+ return IR::Opcode::ConvertF32S8;
+ case IR::Opcode::ConvertF16S16:
+ return IR::Opcode::ConvertF32S16;
+ case IR::Opcode::ConvertF16S32:
+ return IR::Opcode::ConvertF32S32;
+ case IR::Opcode::ConvertF16S64:
+ return IR::Opcode::ConvertF32S64;
+ case IR::Opcode::ConvertF16U8:
+ return IR::Opcode::ConvertF32U8;
+ case IR::Opcode::ConvertF16U16:
+ return IR::Opcode::ConvertF32U16;
+ case IR::Opcode::ConvertF16U32:
+ return IR::Opcode::ConvertF32U32;
+ case IR::Opcode::ConvertF16U64:
+ return IR::Opcode::ConvertF32U64;
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ return IR::Opcode::GlobalAtomicAddF32x2;
+ case IR::Opcode::StorageAtomicAddF16x2:
+ return IR::Opcode::StorageAtomicAddF32x2;
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ return IR::Opcode::GlobalAtomicMinF32x2;
+ case IR::Opcode::StorageAtomicMinF16x2:
+ return IR::Opcode::StorageAtomicMinF32x2;
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ return IR::Opcode::GlobalAtomicMaxF32x2;
+ case IR::Opcode::StorageAtomicMaxF16x2:
+ return IR::Opcode::StorageAtomicMaxF32x2;
+ default:
+ return op;
+ }
+}
+} // Anonymous namespace
+
+void LowerFp16ToFp32(IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ inst.ReplaceOpcode(Replace(inst.GetOpcode()));
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
new file mode 100644
index 000000000..e80d3d1d9
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -0,0 +1,218 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
+ if (packed.IsImmediate()) {
+ const u64 value{packed.U64()};
+ return {
+ ir.Imm32(static_cast<u32>(value)),
+ ir.Imm32(static_cast<u32>(value >> 32)),
+ };
+ } else {
+ return std::pair<IR::U32, IR::U32>{
+ ir.CompositeExtract(packed, 0u),
+ ir.CompositeExtract(packed, 1u),
+ };
+ }
+}
+
+void IAdd64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("IAdd64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
+ const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
+
+ const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
+ const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
+
+ const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ISub64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("ISub64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
+ const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
+
+ const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
+ const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
+ const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
+
+ const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void INeg64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("INeg64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+ lo = ir.BitwiseNot(lo);
+ hi = ir.BitwiseNot(hi);
+
+ lo = ir.IAdd(lo, ir.Imm32(1));
+
+ const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
+ hi = ir.IAdd(hi, carry);
+
+ inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
+}
+
+void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+ const IR::U32 shift{inst.Arg(1)};
+
+ const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
+ const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
+
+ const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+ const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+ const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+ const IR::U32 long_ret_lo{ir.Imm32(0)};
+ const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
+
+ const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+ const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
+ const IR::U32 short_ret_lo{shifted_lo};
+ const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
+
+ const IR::U32 zero_ret_lo{lo};
+ const IR::U32 zero_ret_hi{hi};
+
+ const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+ const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+ const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+ const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+ const IR::U32 shift{inst.Arg(1)};
+
+ const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
+ const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
+
+ const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+ const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+ const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+ const IR::U32 long_ret_hi{ir.Imm32(0)};
+ const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
+
+ const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+ const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
+ const IR::U32 short_ret_hi{shifted_hi};
+ const IR::U32 short_ret_lo{
+ ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
+
+ const IR::U32 zero_ret_lo{lo};
+ const IR::U32 zero_ret_hi{hi};
+
+ const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+ const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+ const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+ const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+ const IR::U32 shift{inst.Arg(1)};
+
+ const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
+ const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
+
+ const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
+
+ const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+ const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+ const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+ const IR::U32 long_ret_hi{sign_extension};
+ const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
+
+ const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+ const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
+ const IR::U32 short_ret_hi{shifted_hi};
+ const IR::U32 short_ret_lo{
+ ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
+
+ const IR::U32 zero_ret_lo{lo};
+ const IR::U32 zero_ret_hi{hi};
+
+ const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+ const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+ const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+ const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void Lower(IR::Block& block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::PackUint2x32:
+ case IR::Opcode::UnpackUint2x32:
+ return inst.ReplaceOpcode(IR::Opcode::Identity);
+ case IR::Opcode::IAdd64:
+ return IAdd64To32(block, inst);
+ case IR::Opcode::ISub64:
+ return ISub64To32(block, inst);
+ case IR::Opcode::INeg64:
+ return INeg64To32(block, inst);
+ case IR::Opcode::ShiftLeftLogical64:
+ return ShiftLeftLogical64To32(block, inst);
+ case IR::Opcode::ShiftRightLogical64:
+ return ShiftRightLogical64To32(block, inst);
+ case IR::Opcode::ShiftRightArithmetic64:
+ return ShiftRightArithmetic64To32(block, inst);
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void LowerInt64ToInt32(IR::Program& program) {
+ const auto end{program.post_order_blocks.rend()};
+ for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
+ IR::Block* const block{*it};
+ for (IR::Inst& inst : block->Instructions()) {
+ Lower(*block, inst);
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
new file mode 100644
index 000000000..2f89b1ea0
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+
+namespace Shader::Optimization {
+
+void CollectShaderInfoPass(Environment& env, IR::Program& program);
+void ConstantPropagationPass(IR::Program& program);
+void DeadCodeEliminationPass(IR::Program& program);
+void GlobalMemoryToStorageBufferPass(IR::Program& program);
+void IdentityRemovalPass(IR::Program& program);
+void LowerFp16ToFp32(IR::Program& program);
+void LowerInt64ToInt32(IR::Program& program);
+void SsaRewritePass(IR::Program& program);
+void TexturePass(Environment& env, IR::Program& program);
+void VerificationPass(const IR::Program& program);
+
+// Dual Vertex
+void VertexATransformPass(IR::Program& program);
+void VertexBTransformPass(IR::Program& program);
+void JoinTextureInfo(Info& base, Info& source);
+void JoinStorageInfo(Info& base, Info& source);
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
new file mode 100644
index 000000000..53145fb5e
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -0,0 +1,383 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This file implements the SSA rewriting algorithm proposed in
+//
+// Simple and Efficient Construction of Static Single Assignment Form.
+// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
+// In: Jhala R., De Bosschere K. (eds)
+// Compiler Construction. CC 2013.
+// Lecture Notes in Computer Science, vol 7791.
+// Springer, Berlin, Heidelberg
+//
+// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
+//
+
+#include <span>
+#include <variant>
+#include <vector>
+
+#include <boost/container/flat_map.hpp>
+#include <boost/container/flat_set.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+struct FlagTag {
+ auto operator<=>(const FlagTag&) const noexcept = default;
+};
+struct ZeroFlagTag : FlagTag {};
+struct SignFlagTag : FlagTag {};
+struct CarryFlagTag : FlagTag {};
+struct OverflowFlagTag : FlagTag {};
+
+struct GotoVariable : FlagTag {
+ GotoVariable() = default;
+ explicit GotoVariable(u32 index_) : index{index_} {}
+
+ auto operator<=>(const GotoVariable&) const noexcept = default;
+
+ u32 index;
+};
+
+struct IndirectBranchVariable {
+ auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
+};
+
+using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
+ OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
+using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
+
+struct DefTable {
+ const IR::Value& Def(IR::Block* block, IR::Reg variable) {
+ return block->SsaRegValue(variable);
+ }
+ void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
+ block->SetSsaRegValue(variable, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, IR::Pred variable) {
+ return preds[IR::PredIndex(variable)][block];
+ }
+ void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
+ preds[IR::PredIndex(variable)].insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, GotoVariable variable) {
+ return goto_vars[variable.index][block];
+ }
+ void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
+ goto_vars[variable.index].insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
+ return indirect_branch_var[block];
+ }
+ void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
+ indirect_branch_var.insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
+ return zero_flag[block];
+ }
+ void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
+ zero_flag.insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, SignFlagTag) {
+ return sign_flag[block];
+ }
+ void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
+ sign_flag.insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, CarryFlagTag) {
+ return carry_flag[block];
+ }
+ void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
+ carry_flag.insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
+ return overflow_flag[block];
+ }
+ void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
+ overflow_flag.insert_or_assign(block, value);
+ }
+
+ std::array<ValueMap, IR::NUM_USER_PREDS> preds;
+ boost::container::flat_map<u32, ValueMap> goto_vars;
+ ValueMap indirect_branch_var;
+ ValueMap zero_flag;
+ ValueMap sign_flag;
+ ValueMap carry_flag;
+ ValueMap overflow_flag;
+};
+
+IR::Opcode UndefOpcode(IR::Reg) noexcept {
+ return IR::Opcode::UndefU32;
+}
+
+IR::Opcode UndefOpcode(IR::Pred) noexcept {
+ return IR::Opcode::UndefU1;
+}
+
+IR::Opcode UndefOpcode(const FlagTag&) noexcept {
+ return IR::Opcode::UndefU1;
+}
+
+IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
+ return IR::Opcode::UndefU32;
+}
+
+enum class Status {
+ Start,
+ SetValue,
+ PreparePhiArgument,
+ PushPhiArgument,
+};
+
+template <typename Type>
+struct ReadState {
+ ReadState(IR::Block* block_) : block{block_} {}
+ ReadState() = default;
+
+ IR::Block* block{};
+ IR::Value result{};
+ IR::Inst* phi{};
+ IR::Block* const* pred_it{};
+ IR::Block* const* pred_end{};
+ Status pc{Status::Start};
+};
+
+class Pass {
+public:
+ template <typename Type>
+ void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
+ current_def.SetDef(block, variable, value);
+ }
+
+ template <typename Type>
+ IR::Value ReadVariable(Type variable, IR::Block* root_block) {
+ boost::container::small_vector<ReadState<Type>, 64> stack{
+ ReadState<Type>(nullptr),
+ ReadState<Type>(root_block),
+ };
+ const auto prepare_phi_operand{[&] {
+ if (stack.back().pred_it == stack.back().pred_end) {
+ IR::Inst* const phi{stack.back().phi};
+ IR::Block* const block{stack.back().block};
+ const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
+ stack.pop_back();
+ stack.back().result = result;
+ WriteVariable(variable, block, result);
+ } else {
+ IR::Block* const imm_pred{*stack.back().pred_it};
+ stack.back().pc = Status::PushPhiArgument;
+ stack.emplace_back(imm_pred);
+ }
+ }};
+ do {
+ IR::Block* const block{stack.back().block};
+ switch (stack.back().pc) {
+ case Status::Start: {
+ if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
+ stack.back().result = def;
+ } else if (!block->IsSsaSealed()) {
+ // Incomplete CFG
+ IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+ phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
+
+ incomplete_phis[block].insert_or_assign(variable, phi);
+ stack.back().result = IR::Value{&*phi};
+ } else if (const std::span imm_preds = block->ImmPredecessors();
+ imm_preds.size() == 1) {
+ // Optimize the common case of one predecessor: no phi needed
+ stack.back().pc = Status::SetValue;
+ stack.emplace_back(imm_preds.front());
+ break;
+ } else {
+ // Break potential cycles with operandless phi
+ IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+ phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
+
+ WriteVariable(variable, block, IR::Value{phi});
+
+ stack.back().phi = phi;
+ stack.back().pred_it = imm_preds.data();
+ stack.back().pred_end = imm_preds.data() + imm_preds.size();
+ prepare_phi_operand();
+ break;
+ }
+ }
+ [[fallthrough]];
+ case Status::SetValue: {
+ const IR::Value result{stack.back().result};
+ WriteVariable(variable, block, result);
+ stack.pop_back();
+ stack.back().result = result;
+ break;
+ }
+ case Status::PushPhiArgument: {
+ IR::Inst* const phi{stack.back().phi};
+ phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
+ ++stack.back().pred_it;
+ }
+ [[fallthrough]];
+ case Status::PreparePhiArgument:
+ prepare_phi_operand();
+ break;
+ }
+ } while (stack.size() > 1);
+ return stack.back().result;
+ }
+
+ void SealBlock(IR::Block* block) {
+ const auto it{incomplete_phis.find(block)};
+ if (it != incomplete_phis.end()) {
+ for (auto& pair : it->second) {
+ auto& variant{pair.first};
+ auto& phi{pair.second};
+ std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
+ }
+ }
+ block->SsaSeal();
+ }
+
+private:
+ template <typename Type>
+ IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
+ for (IR::Block* const imm_pred : block->ImmPredecessors()) {
+ phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
+ }
+ return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
+ }
+
+ IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
+ IR::Value same;
+ const size_t num_args{phi.NumArgs()};
+ for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
+ const IR::Value& op{phi.Arg(arg_index)};
+ if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
+ // Unique value or self-reference
+ continue;
+ }
+ if (!same.IsEmpty()) {
+ // The phi merges at least two values: not trivial
+ return IR::Value{&phi};
+ }
+ same = op;
+ }
+ // Remove the phi node from the block, it will be reinserted
+ IR::Block::InstructionList& list{block->Instructions()};
+ list.erase(IR::Block::InstructionList::s_iterator_to(phi));
+
+ // Find the first non-phi instruction and use it as an insertion point
+ IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
+ if (same.IsEmpty()) {
+ // The phi is unreachable or in the start block
+ // Insert an undefined instruction and make it the phi node replacement
+ // The "phi" node reinsertion point is specified after this instruction
+ reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
+ same = IR::Value{&*reinsert_point};
+ ++reinsert_point;
+ }
+ // Reinsert the phi node and reroute all its uses to the "same" value
+ list.insert(reinsert_point, phi);
+ phi.ReplaceUsesWith(same);
+ // TODO: Try to recursively remove all phi users, which might have become trivial
+ return same;
+ }
+
+ boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
+ incomplete_phis;
+ DefTable current_def;
+};
+
+void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::SetRegister:
+ if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+ pass.WriteVariable(reg, block, inst.Arg(1));
+ }
+ break;
+ case IR::Opcode::SetPred:
+ if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+ pass.WriteVariable(pred, block, inst.Arg(1));
+ }
+ break;
+ case IR::Opcode::SetGotoVariable:
+ pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
+ break;
+ case IR::Opcode::SetIndirectBranchVariable:
+ pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::SetZFlag:
+ pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::SetSFlag:
+ pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::SetCFlag:
+ pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::SetOFlag:
+ pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::GetRegister:
+ if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+ inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
+ }
+ break;
+ case IR::Opcode::GetPred:
+ if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+ inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
+ }
+ break;
+ case IR::Opcode::GetGotoVariable:
+ inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
+ break;
+ case IR::Opcode::GetIndirectBranchVariable:
+ inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
+ break;
+ case IR::Opcode::GetZFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
+ break;
+ case IR::Opcode::GetSFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
+ break;
+ case IR::Opcode::GetCFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
+ break;
+ case IR::Opcode::GetOFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
+ break;
+ default:
+ break;
+ }
+}
+
+void VisitBlock(Pass& pass, IR::Block* block) {
+ for (IR::Inst& inst : block->Instructions()) {
+ VisitInst(pass, block, inst);
+ }
+ pass.SealBlock(block);
+}
+} // Anonymous namespace
+
+void SsaRewritePass(IR::Program& program) {
+ Pass pass;
+ const auto end{program.post_order_blocks.rend()};
+ for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
+ VisitBlock(pass, *block);
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
new file mode 100644
index 000000000..44ad10d43
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -0,0 +1,523 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <bit>
+#include <optional>
+
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/breadth_first_search.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+struct ConstBufferAddr {
+ u32 index;
+ u32 offset;
+ u32 secondary_index;
+ u32 secondary_offset;
+ IR::U32 dynamic_offset;
+ u32 count;
+ bool has_secondary;
+};
+
+struct TextureInst {
+ ConstBufferAddr cbuf;
+ IR::Inst* inst;
+ IR::Block* block;
+};
+
+using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
+
+constexpr u32 DESCRIPTOR_SIZE = 8;
+constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
+
+IR::Opcode IndexedInstruction(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::BindlessImageSampleImplicitLod:
+ case IR::Opcode::BoundImageSampleImplicitLod:
+ return IR::Opcode::ImageSampleImplicitLod;
+ case IR::Opcode::BoundImageSampleExplicitLod:
+ case IR::Opcode::BindlessImageSampleExplicitLod:
+ return IR::Opcode::ImageSampleExplicitLod;
+ case IR::Opcode::BoundImageSampleDrefImplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+ return IR::Opcode::ImageSampleDrefImplicitLod;
+ case IR::Opcode::BoundImageSampleDrefExplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+ return IR::Opcode::ImageSampleDrefExplicitLod;
+ case IR::Opcode::BindlessImageGather:
+ case IR::Opcode::BoundImageGather:
+ return IR::Opcode::ImageGather;
+ case IR::Opcode::BindlessImageGatherDref:
+ case IR::Opcode::BoundImageGatherDref:
+ return IR::Opcode::ImageGatherDref;
+ case IR::Opcode::BindlessImageFetch:
+ case IR::Opcode::BoundImageFetch:
+ return IR::Opcode::ImageFetch;
+ case IR::Opcode::BoundImageQueryDimensions:
+ case IR::Opcode::BindlessImageQueryDimensions:
+ return IR::Opcode::ImageQueryDimensions;
+ case IR::Opcode::BoundImageQueryLod:
+ case IR::Opcode::BindlessImageQueryLod:
+ return IR::Opcode::ImageQueryLod;
+ case IR::Opcode::BoundImageGradient:
+ case IR::Opcode::BindlessImageGradient:
+ return IR::Opcode::ImageGradient;
+ case IR::Opcode::BoundImageRead:
+ case IR::Opcode::BindlessImageRead:
+ return IR::Opcode::ImageRead;
+ case IR::Opcode::BoundImageWrite:
+ case IR::Opcode::BindlessImageWrite:
+ return IR::Opcode::ImageWrite;
+ case IR::Opcode::BoundImageAtomicIAdd32:
+ case IR::Opcode::BindlessImageAtomicIAdd32:
+ return IR::Opcode::ImageAtomicIAdd32;
+ case IR::Opcode::BoundImageAtomicSMin32:
+ case IR::Opcode::BindlessImageAtomicSMin32:
+ return IR::Opcode::ImageAtomicSMin32;
+ case IR::Opcode::BoundImageAtomicUMin32:
+ case IR::Opcode::BindlessImageAtomicUMin32:
+ return IR::Opcode::ImageAtomicUMin32;
+ case IR::Opcode::BoundImageAtomicSMax32:
+ case IR::Opcode::BindlessImageAtomicSMax32:
+ return IR::Opcode::ImageAtomicSMax32;
+ case IR::Opcode::BoundImageAtomicUMax32:
+ case IR::Opcode::BindlessImageAtomicUMax32:
+ return IR::Opcode::ImageAtomicUMax32;
+ case IR::Opcode::BoundImageAtomicInc32:
+ case IR::Opcode::BindlessImageAtomicInc32:
+ return IR::Opcode::ImageAtomicInc32;
+ case IR::Opcode::BoundImageAtomicDec32:
+ case IR::Opcode::BindlessImageAtomicDec32:
+ return IR::Opcode::ImageAtomicDec32;
+ case IR::Opcode::BoundImageAtomicAnd32:
+ case IR::Opcode::BindlessImageAtomicAnd32:
+ return IR::Opcode::ImageAtomicAnd32;
+ case IR::Opcode::BoundImageAtomicOr32:
+ case IR::Opcode::BindlessImageAtomicOr32:
+ return IR::Opcode::ImageAtomicOr32;
+ case IR::Opcode::BoundImageAtomicXor32:
+ case IR::Opcode::BindlessImageAtomicXor32:
+ return IR::Opcode::ImageAtomicXor32;
+ case IR::Opcode::BoundImageAtomicExchange32:
+ case IR::Opcode::BindlessImageAtomicExchange32:
+ return IR::Opcode::ImageAtomicExchange32;
+ default:
+ return IR::Opcode::Void;
+ }
+}
+
+bool IsBindless(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::BindlessImageSampleImplicitLod:
+ case IR::Opcode::BindlessImageSampleExplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+ case IR::Opcode::BindlessImageGather:
+ case IR::Opcode::BindlessImageGatherDref:
+ case IR::Opcode::BindlessImageFetch:
+ case IR::Opcode::BindlessImageQueryDimensions:
+ case IR::Opcode::BindlessImageQueryLod:
+ case IR::Opcode::BindlessImageGradient:
+ case IR::Opcode::BindlessImageRead:
+ case IR::Opcode::BindlessImageWrite:
+ case IR::Opcode::BindlessImageAtomicIAdd32:
+ case IR::Opcode::BindlessImageAtomicSMin32:
+ case IR::Opcode::BindlessImageAtomicUMin32:
+ case IR::Opcode::BindlessImageAtomicSMax32:
+ case IR::Opcode::BindlessImageAtomicUMax32:
+ case IR::Opcode::BindlessImageAtomicInc32:
+ case IR::Opcode::BindlessImageAtomicDec32:
+ case IR::Opcode::BindlessImageAtomicAnd32:
+ case IR::Opcode::BindlessImageAtomicOr32:
+ case IR::Opcode::BindlessImageAtomicXor32:
+ case IR::Opcode::BindlessImageAtomicExchange32:
+ return true;
+ case IR::Opcode::BoundImageSampleImplicitLod:
+ case IR::Opcode::BoundImageSampleExplicitLod:
+ case IR::Opcode::BoundImageSampleDrefImplicitLod:
+ case IR::Opcode::BoundImageSampleDrefExplicitLod:
+ case IR::Opcode::BoundImageGather:
+ case IR::Opcode::BoundImageGatherDref:
+ case IR::Opcode::BoundImageFetch:
+ case IR::Opcode::BoundImageQueryDimensions:
+ case IR::Opcode::BoundImageQueryLod:
+ case IR::Opcode::BoundImageGradient:
+ case IR::Opcode::BoundImageRead:
+ case IR::Opcode::BoundImageWrite:
+ case IR::Opcode::BoundImageAtomicIAdd32:
+ case IR::Opcode::BoundImageAtomicSMin32:
+ case IR::Opcode::BoundImageAtomicUMin32:
+ case IR::Opcode::BoundImageAtomicSMax32:
+ case IR::Opcode::BoundImageAtomicUMax32:
+ case IR::Opcode::BoundImageAtomicInc32:
+ case IR::Opcode::BoundImageAtomicDec32:
+ case IR::Opcode::BoundImageAtomicAnd32:
+ case IR::Opcode::BoundImageAtomicOr32:
+ case IR::Opcode::BoundImageAtomicXor32:
+ case IR::Opcode::BoundImageAtomicExchange32:
+ return false;
+ default:
+ throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
+ }
+}
+
+bool IsTextureInstruction(const IR::Inst& inst) {
+ return IndexedInstruction(inst) != IR::Opcode::Void;
+}
+
+std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst);
+
+std::optional<ConstBufferAddr> Track(const IR::Value& value) {
+ return IR::BreadthFirstSearch(value, TryGetConstBuffer);
+}
+
+std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+ default:
+ return std::nullopt;
+ case IR::Opcode::BitwiseOr32: {
+ std::optional lhs{Track(inst->Arg(0))};
+ std::optional rhs{Track(inst->Arg(1))};
+ if (!lhs || !rhs) {
+ return std::nullopt;
+ }
+ if (lhs->has_secondary || rhs->has_secondary) {
+ return std::nullopt;
+ }
+ if (lhs->count > 1 || rhs->count > 1) {
+ return std::nullopt;
+ }
+ if (lhs->index > rhs->index || lhs->offset > rhs->offset) {
+ std::swap(lhs, rhs);
+ }
+ return ConstBufferAddr{
+ .index = lhs->index,
+ .offset = lhs->offset,
+ .secondary_index = rhs->index,
+ .secondary_offset = rhs->offset,
+ .dynamic_offset = {},
+ .count = 1,
+ .has_secondary = true,
+ };
+ }
+ case IR::Opcode::GetCbufU32x2:
+ case IR::Opcode::GetCbufU32:
+ break;
+ }
+ const IR::Value index{inst->Arg(0)};
+ const IR::Value offset{inst->Arg(1)};
+ if (!index.IsImmediate()) {
+ // Reading a bindless texture from variable indices is valid
+ // but not supported here at the moment
+ return std::nullopt;
+ }
+ if (offset.IsImmediate()) {
+ return ConstBufferAddr{
+ .index = index.U32(),
+ .offset = offset.U32(),
+ .secondary_index = 0,
+ .secondary_offset = 0,
+ .dynamic_offset = {},
+ .count = 1,
+ .has_secondary = false,
+ };
+ }
+ IR::Inst* const offset_inst{offset.InstRecursive()};
+ if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) {
+ return std::nullopt;
+ }
+ u32 base_offset{};
+ IR::U32 dynamic_offset;
+ if (offset_inst->Arg(0).IsImmediate()) {
+ base_offset = offset_inst->Arg(0).U32();
+ dynamic_offset = IR::U32{offset_inst->Arg(1)};
+ } else if (offset_inst->Arg(1).IsImmediate()) {
+ base_offset = offset_inst->Arg(1).U32();
+ dynamic_offset = IR::U32{offset_inst->Arg(0)};
+ } else {
+ return std::nullopt;
+ }
+ return ConstBufferAddr{
+ .index = index.U32(),
+ .offset = base_offset,
+ .secondary_index = 0,
+ .secondary_offset = 0,
+ .dynamic_offset = dynamic_offset,
+ .count = 8,
+ .has_secondary = false,
+ };
+}
+
+TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
+ ConstBufferAddr addr;
+ if (IsBindless(inst)) {
+ const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))};
+ if (!track_addr) {
+ throw NotImplementedException("Failed to track bindless texture constant buffer");
+ }
+ addr = *track_addr;
+ } else {
+ addr = ConstBufferAddr{
+ .index = env.TextureBoundBuffer(),
+ .offset = inst.Arg(0).U32(),
+ .secondary_index = 0,
+ .secondary_offset = 0,
+ .dynamic_offset = {},
+ .count = 1,
+ .has_secondary = false,
+ };
+ }
+ return TextureInst{
+ .cbuf = addr,
+ .inst = &inst,
+ .block = block,
+ };
+}
+
+TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+ const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
+ const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
+ const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
+ const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
+ return env.ReadTextureType(lhs_raw | rhs_raw);
+}
+
+class Descriptors {
+public:
+ explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
+ ImageBufferDescriptors& image_buffer_descriptors_,
+ TextureDescriptors& texture_descriptors_,
+ ImageDescriptors& image_descriptors_)
+ : texture_buffer_descriptors{texture_buffer_descriptors_},
+ image_buffer_descriptors{image_buffer_descriptors_},
+ texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {}
+
+ u32 Add(const TextureBufferDescriptor& desc) {
+ return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
+ return desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset &&
+ desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
+ desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+ desc.count == existing.count && desc.size_shift == existing.size_shift &&
+ desc.has_secondary == existing.has_secondary;
+ });
+ }
+
+ u32 Add(const ImageBufferDescriptor& desc) {
+ const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
+ return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
+ desc.size_shift == existing.size_shift;
+ })};
+ image_buffer_descriptors[index].is_written |= desc.is_written;
+ image_buffer_descriptors[index].is_read |= desc.is_read;
+ return index;
+ }
+
+ u32 Add(const TextureDescriptor& desc) {
+ return Add(texture_descriptors, desc, [&desc](const auto& existing) {
+ return desc.type == existing.type && desc.is_depth == existing.is_depth &&
+ desc.has_secondary == existing.has_secondary &&
+ desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset &&
+ desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
+ desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+ desc.count == existing.count && desc.size_shift == existing.size_shift;
+ });
+ }
+
+ u32 Add(const ImageDescriptor& desc) {
+ const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) {
+ return desc.type == existing.type && desc.format == existing.format &&
+ desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
+ desc.size_shift == existing.size_shift;
+ })};
+ image_descriptors[index].is_written |= desc.is_written;
+ image_descriptors[index].is_read |= desc.is_read;
+ return index;
+ }
+
+private:
+ template <typename Descriptors, typename Descriptor, typename Func>
+ static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+ // TODO: Handle arrays
+ const auto it{std::ranges::find_if(descriptors, pred)};
+ if (it != descriptors.end()) {
+ return static_cast<u32>(std::distance(descriptors.begin(), it));
+ }
+ descriptors.push_back(desc);
+ return static_cast<u32>(descriptors.size()) - 1;
+ }
+
+ TextureBufferDescriptors& texture_buffer_descriptors;
+ ImageBufferDescriptors& image_buffer_descriptors;
+ TextureDescriptors& texture_descriptors;
+ ImageDescriptors& image_descriptors;
+};
+} // Anonymous namespace
+
+void TexturePass(Environment& env, IR::Program& program) {
+ TextureInstVector to_replace;
+ for (IR::Block* const block : program.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (!IsTextureInstruction(inst)) {
+ continue;
+ }
+ to_replace.push_back(MakeInst(env, block, inst));
+ }
+ }
+ // Sort instructions to visit textures by constant buffer index, then by offset
+ std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
+ return lhs.cbuf.offset < rhs.cbuf.offset;
+ });
+ std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
+ return lhs.cbuf.index < rhs.cbuf.index;
+ });
+ Descriptors descriptors{
+ program.info.texture_buffer_descriptors,
+ program.info.image_buffer_descriptors,
+ program.info.texture_descriptors,
+ program.info.image_descriptors,
+ };
+ for (TextureInst& texture_inst : to_replace) {
+ // TODO: Handle arrays
+ IR::Inst* const inst{texture_inst.inst};
+ inst->ReplaceOpcode(IndexedInstruction(*inst));
+
+ const auto& cbuf{texture_inst.cbuf};
+ auto flags{inst->Flags<IR::TextureInstInfo>()};
+ switch (inst->GetOpcode()) {
+ case IR::Opcode::ImageQueryDimensions:
+ flags.type.Assign(ReadTextureType(env, cbuf));
+ inst->SetFlags(flags);
+ break;
+ case IR::Opcode::ImageFetch:
+ if (flags.type != TextureType::Color1D) {
+ break;
+ }
+ if (ReadTextureType(env, cbuf) == TextureType::Buffer) {
+ // Replace with the bound texture type only when it's a texture buffer
+ // If the instruction is 1D and the bound type is 2D, don't change the code and let
+ // the rasterizer robustness handle it
+ // This happens on Fire Emblem: Three Houses
+ flags.type.Assign(TextureType::Buffer);
+ }
+ break;
+ default:
+ break;
+ }
+ u32 index;
+ switch (inst->GetOpcode()) {
+ case IR::Opcode::ImageRead:
+ case IR::Opcode::ImageAtomicIAdd32:
+ case IR::Opcode::ImageAtomicSMin32:
+ case IR::Opcode::ImageAtomicUMin32:
+ case IR::Opcode::ImageAtomicSMax32:
+ case IR::Opcode::ImageAtomicUMax32:
+ case IR::Opcode::ImageAtomicInc32:
+ case IR::Opcode::ImageAtomicDec32:
+ case IR::Opcode::ImageAtomicAnd32:
+ case IR::Opcode::ImageAtomicOr32:
+ case IR::Opcode::ImageAtomicXor32:
+ case IR::Opcode::ImageAtomicExchange32:
+ case IR::Opcode::ImageWrite: {
+ if (cbuf.has_secondary) {
+ throw NotImplementedException("Unexpected separate sampler");
+ }
+ const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
+ const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite};
+ if (flags.type == TextureType::Buffer) {
+ index = descriptors.Add(ImageBufferDescriptor{
+ .format = flags.image_format,
+ .is_written = is_written,
+ .is_read = is_read,
+ .cbuf_index = cbuf.index,
+ .cbuf_offset = cbuf.offset,
+ .count = cbuf.count,
+ .size_shift = DESCRIPTOR_SIZE_SHIFT,
+ });
+ } else {
+ index = descriptors.Add(ImageDescriptor{
+ .type = flags.type,
+ .format = flags.image_format,
+ .is_written = is_written,
+ .is_read = is_read,
+ .cbuf_index = cbuf.index,
+ .cbuf_offset = cbuf.offset,
+ .count = cbuf.count,
+ .size_shift = DESCRIPTOR_SIZE_SHIFT,
+ });
+ }
+ break;
+ }
+ default:
+ if (flags.type == TextureType::Buffer) {
+ index = descriptors.Add(TextureBufferDescriptor{
+ .has_secondary = cbuf.has_secondary,
+ .cbuf_index = cbuf.index,
+ .cbuf_offset = cbuf.offset,
+ .secondary_cbuf_index = cbuf.secondary_index,
+ .secondary_cbuf_offset = cbuf.secondary_offset,
+ .count = cbuf.count,
+ .size_shift = DESCRIPTOR_SIZE_SHIFT,
+ });
+ } else {
+ index = descriptors.Add(TextureDescriptor{
+ .type = flags.type,
+ .is_depth = flags.is_depth != 0,
+ .has_secondary = cbuf.has_secondary,
+ .cbuf_index = cbuf.index,
+ .cbuf_offset = cbuf.offset,
+ .secondary_cbuf_index = cbuf.secondary_index,
+ .secondary_cbuf_offset = cbuf.secondary_offset,
+ .count = cbuf.count,
+ .size_shift = DESCRIPTOR_SIZE_SHIFT,
+ });
+ }
+ break;
+ }
+ flags.descriptor_index.Assign(index);
+ inst->SetFlags(flags);
+
+ if (cbuf.count > 1) {
+ const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
+ IR::IREmitter ir{*texture_inst.block, insert_point};
+ const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))};
+ inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift));
+ } else {
+ inst->SetArg(0, IR::Value{});
+ }
+ }
+}
+
+void JoinTextureInfo(Info& base, Info& source) {
+ Descriptors descriptors{
+ base.texture_buffer_descriptors,
+ base.image_buffer_descriptors,
+ base.texture_descriptors,
+ base.image_descriptors,
+ };
+ for (auto& desc : source.texture_buffer_descriptors) {
+ descriptors.Add(desc);
+ }
+ for (auto& desc : source.image_buffer_descriptors) {
+ descriptors.Add(desc);
+ }
+ for (auto& desc : source.texture_descriptors) {
+ descriptors.Add(desc);
+ }
+ for (auto& desc : source.image_descriptors) {
+ descriptors.Add(desc);
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
new file mode 100644
index 000000000..975d5aadf
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -0,0 +1,98 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <set>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+static void ValidateTypes(const IR::Program& program) {
+ for (const auto& block : program.blocks) {
+ for (const IR::Inst& inst : *block) {
+ if (inst.GetOpcode() == IR::Opcode::Phi) {
+ // Skip validation on phi nodes
+ continue;
+ }
+ const size_t num_args{inst.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ const IR::Type t1{inst.Arg(i).Type()};
+ const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
+ if (!IR::AreTypesCompatible(t1, t2)) {
+ throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
+ }
+ }
+ }
+ }
+}
+
+static void ValidateUses(const IR::Program& program) {
+ std::map<IR::Inst*, int> actual_uses;
+ for (const auto& block : program.blocks) {
+ for (const IR::Inst& inst : *block) {
+ const size_t num_args{inst.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ const IR::Value arg{inst.Arg(i)};
+ if (!arg.IsImmediate()) {
+ ++actual_uses[arg.Inst()];
+ }
+ }
+ }
+ }
+ for (const auto [inst, uses] : actual_uses) {
+ if (inst->UseCount() != uses) {
+ throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
+ }
+ }
+}
+
+static void ValidateForwardDeclarations(const IR::Program& program) {
+ std::set<const IR::Inst*> definitions;
+ for (const IR::Block* const block : program.blocks) {
+ for (const IR::Inst& inst : *block) {
+ definitions.emplace(&inst);
+ if (inst.GetOpcode() == IR::Opcode::Phi) {
+ // Phi nodes can have forward declarations
+ continue;
+ }
+ const size_t num_args{inst.NumArgs()};
+ for (size_t arg = 0; arg < num_args; ++arg) {
+ if (inst.Arg(arg).IsImmediate()) {
+ continue;
+ }
+ if (!definitions.contains(inst.Arg(arg).Inst())) {
+ throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
+ }
+ }
+ }
+ }
+}
+
+static void ValidatePhiNodes(const IR::Program& program) {
+ for (const IR::Block* const block : program.blocks) {
+ bool no_more_phis{false};
+ for (const IR::Inst& inst : *block) {
+ if (inst.GetOpcode() == IR::Opcode::Phi) {
+ if (no_more_phis) {
+ throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
+ }
+ } else {
+ no_more_phis = true;
+ }
+ }
+ }
+}
+
+void VerificationPass(const IR::Program& program) {
+ ValidateTypes(program);
+ ValidateUses(program);
+ ValidateForwardDeclarations(program);
+ ValidatePhiNodes(program);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
new file mode 100644
index 000000000..f8b255b66
--- /dev/null
+++ b/src/shader_recompiler/object_pool.h
@@ -0,0 +1,104 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace Shader {
+
+template <typename T>
+requires std::is_destructible_v<T> class ObjectPool {
+public:
+ explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
+ node = &chunks.emplace_back(new_chunk_size);
+ }
+
+ template <typename... Args>
+ requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) {
+ return std::construct_at(Memory(), std::forward<Args>(args)...);
+ }
+
+ void ReleaseContents() {
+ if (chunks.empty()) {
+ return;
+ }
+ Chunk& root{chunks.front()};
+ if (root.used_objects == root.num_objects) {
+ // Root chunk has been filled, squash allocations into it
+ const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)};
+ chunks.clear();
+ chunks.emplace_back(total_objects);
+ } else {
+ root.Release();
+ chunks.resize(1);
+ }
+ chunks.shrink_to_fit();
+ node = &chunks.front();
+ }
+
+private:
+ struct NonTrivialDummy {
+ NonTrivialDummy() noexcept {}
+ };
+
+ union Storage {
+ Storage() noexcept {}
+ ~Storage() noexcept {}
+
+ NonTrivialDummy dummy{};
+ T object;
+ };
+
+ struct Chunk {
+ explicit Chunk() = default;
+ explicit Chunk(size_t size)
+ : num_objects{size}, storage{std::make_unique<Storage[]>(size)} {}
+
+ Chunk& operator=(Chunk&& rhs) noexcept {
+ Release();
+ used_objects = std::exchange(rhs.used_objects, 0);
+ num_objects = std::exchange(rhs.num_objects, 0);
+ storage = std::move(rhs.storage);
+ }
+
+ Chunk(Chunk&& rhs) noexcept
+ : used_objects{std::exchange(rhs.used_objects, 0)},
+ num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {}
+
+ ~Chunk() {
+ Release();
+ }
+
+ void Release() {
+ std::destroy_n(storage.get(), used_objects);
+ used_objects = 0;
+ }
+
+ size_t used_objects{};
+ size_t num_objects{};
+ std::unique_ptr<Storage[]> storage;
+ };
+
+ [[nodiscard]] T* Memory() {
+ Chunk* const chunk{FreeChunk()};
+ return &chunk->storage[chunk->used_objects++].object;
+ }
+
+ [[nodiscard]] Chunk* FreeChunk() {
+ if (node->used_objects != node->num_objects) {
+ return node;
+ }
+ node = &chunks.emplace_back(new_chunk_size);
+ return node;
+ }
+
+ Chunk* node{};
+ std::vector<Chunk> chunks;
+ size_t new_chunk_size{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
new file mode 100644
index 000000000..f0c3b3b17
--- /dev/null
+++ b/src/shader_recompiler/profile.h
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader {
+
+struct Profile {
+ u32 supported_spirv{0x00010000};
+
+ bool unified_descriptor_binding{};
+ bool support_descriptor_aliasing{};
+ bool support_int8{};
+ bool support_int16{};
+ bool support_int64{};
+ bool support_vertex_instance_id{};
+ bool support_float_controls{};
+ bool support_separate_denorm_behavior{};
+ bool support_separate_rounding_mode{};
+ bool support_fp16_denorm_preserve{};
+ bool support_fp32_denorm_preserve{};
+ bool support_fp16_denorm_flush{};
+ bool support_fp32_denorm_flush{};
+ bool support_fp16_signed_zero_nan_preserve{};
+ bool support_fp32_signed_zero_nan_preserve{};
+ bool support_fp64_signed_zero_nan_preserve{};
+ bool support_explicit_workgroup_layout{};
+ bool support_vote{};
+ bool support_viewport_index_layer_non_geometry{};
+ bool support_viewport_mask{};
+ bool support_typeless_image_loads{};
+ bool support_demote_to_helper_invocation{};
+ bool support_int64_atomics{};
+ bool support_derivative_control{};
+ bool support_geometry_shader_passthrough{};
+ bool support_gl_nv_gpu_shader_5{};
+ bool support_gl_amd_gpu_shader_half_float{};
+ bool support_gl_texture_shadow_lod{};
+ bool support_gl_warp_intrinsics{};
+ bool support_gl_variable_aoffi{};
+ bool support_gl_sparse_textures{};
+ bool support_gl_derivative_control{};
+
+ bool warp_size_potentially_larger_than_guest{};
+
+ bool lower_left_origin_mode{};
+ /// Fragment outputs have to be declared even if they are not written to avoid undefined values.
+ /// See Ori and the Blind Forest's main menu for reference.
+ bool need_declared_frag_colors{};
+ /// Prevents fast math optimizations that may cause inaccuracies
+ bool need_fastmath_off{};
+
+ /// OpFClamp is broken and OpFMax + OpFMin should be used instead
+ bool has_broken_spirv_clamp{};
+ /// Offset image operands with an unsigned type do not work
+ bool has_broken_unsigned_image_offsets{};
+ /// Signed instructions with unsigned data types are misinterpreted
+ bool has_broken_signed_operations{};
+ /// Float controls break when fp16 is enabled
+ bool has_broken_fp16_float_controls{};
+ /// Dynamic vec4 indexing is broken on some OpenGL drivers
+ bool has_gl_component_indexing_bug{};
+ /// The precise type qualifier is broken in the fragment stage of some drivers
+ bool has_gl_precise_bug{};
+ /// Ignores SPIR-V ordered vs unordered using GLSL semantics
+ bool ignore_nan_fp_comparisons{};
+
+ u32 gl_max_compute_smem_size{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h
new file mode 100644
index 000000000..bd6c2bfb5
--- /dev/null
+++ b/src/shader_recompiler/program_header.h
@@ -0,0 +1,219 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Shader {
+
+enum class OutputTopology : u32 {
+ PointList = 1,
+ LineStrip = 6,
+ TriangleStrip = 7,
+};
+
+enum class PixelImap : u8 {
+ Unused = 0,
+ Constant = 1,
+ Perspective = 2,
+ ScreenLinear = 3,
+};
+
+// Documentation in:
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
+struct ProgramHeader {
+ union {
+ BitField<0, 5, u32> sph_type;
+ BitField<5, 5, u32> version;
+ BitField<10, 4, u32> shader_type;
+ BitField<14, 1, u32> mrt_enable;
+ BitField<15, 1, u32> kills_pixels;
+ BitField<16, 1, u32> does_global_store;
+ BitField<17, 4, u32> sass_version;
+ BitField<21, 2, u32> reserved1;
+ BitField<24, 1, u32> geometry_passthrough;
+ BitField<25, 1, u32> reserved2;
+ BitField<26, 1, u32> does_load_or_store;
+ BitField<27, 1, u32> does_fp64;
+ BitField<28, 4, u32> stream_out_mask;
+ } common0;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_low_size;
+ BitField<24, 8, u32> per_patch_attribute_count;
+ } common1;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_high_size;
+ BitField<24, 8, u32> threads_per_input_primitive;
+ } common2;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_crs_size;
+ BitField<24, 4, OutputTopology> output_topology;
+ BitField<28, 4, u32> reserved;
+ } common3;
+
+ union {
+ BitField<0, 12, u32> max_output_vertices;
+ BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
+ BitField<20, 4, u32> reserved;
+ BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+ } common4;
+
+ union {
+ struct {
+ INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+
+ union {
+ BitField<0, 1, u8> primitive_array_id;
+ BitField<1, 1, u8> rt_array_index;
+ BitField<2, 1, u8> viewport_index;
+ BitField<3, 1, u8> point_size;
+ BitField<4, 1, u8> position_x;
+ BitField<5, 1, u8> position_y;
+ BitField<6, 1, u8> position_z;
+ BitField<7, 1, u8> position_w;
+ u8 raw;
+ } imap_systemb;
+
+ std::array<u8, 16> imap_generic_vector;
+
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
+ union {
+ BitField<0, 8, u16> clip_distances;
+ BitField<8, 1, u16> point_sprite_s;
+ BitField<9, 1, u16> point_sprite_t;
+ BitField<10, 1, u16> fog_coordinate;
+ BitField<12, 1, u16> tessellation_eval_point_u;
+ BitField<13, 1, u16> tessellation_eval_point_v;
+ BitField<14, 1, u16> instance_id;
+ BitField<15, 1, u16> vertex_id;
+ };
+ INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
+ INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
+
+ union {
+ BitField<0, 1, u8> primitive_array_id;
+ BitField<1, 1, u8> rt_array_index;
+ BitField<2, 1, u8> viewport_index;
+ BitField<3, 1, u8> point_size;
+ BitField<4, 1, u8> position_x;
+ BitField<5, 1, u8> position_y;
+ BitField<6, 1, u8> position_z;
+ BitField<7, 1, u8> position_w;
+ u8 raw;
+ } omap_systemb;
+
+ std::array<u8, 16> omap_generic_vector;
+
+ INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
+
+ union {
+ BitField<0, 8, u16> clip_distances;
+ BitField<8, 1, u16> point_sprite_s;
+ BitField<9, 1, u16> point_sprite_t;
+ BitField<10, 1, u16> fog_coordinate;
+ BitField<12, 1, u16> tessellation_eval_point_u;
+ BitField<13, 1, u16> tessellation_eval_point_v;
+ BitField<14, 1, u16> instance_id;
+ BitField<15, 1, u16> vertex_id;
+ } omap_systemc;
+
+ INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
+
+ [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept {
+ const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)};
+ return {
+ (data & 1) != 0,
+ (data & 2) != 0,
+ (data & 4) != 0,
+ (data & 8) != 0,
+ };
+ }
+
+ [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept {
+ const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)};
+ return {
+ (data & 1) != 0,
+ (data & 2) != 0,
+ (data & 4) != 0,
+ (data & 8) != 0,
+ };
+ }
+ } vtg;
+
+ struct {
+ INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+
+ union {
+ BitField<0, 1, u8> primitive_array_id;
+ BitField<1, 1, u8> rt_array_index;
+ BitField<2, 1, u8> viewport_index;
+ BitField<3, 1, u8> point_size;
+ BitField<4, 1, u8> position_x;
+ BitField<5, 1, u8> position_y;
+ BitField<6, 1, u8> position_z;
+ BitField<7, 1, u8> position_w;
+ BitField<0, 4, u8> first;
+ BitField<4, 4, u8> position;
+ u8 raw;
+ } imap_systemb;
+
+ union {
+ BitField<0, 2, PixelImap> x;
+ BitField<2, 2, PixelImap> y;
+ BitField<4, 2, PixelImap> z;
+ BitField<6, 2, PixelImap> w;
+ u8 raw;
+ } imap_generic_vector[32];
+
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
+
+ struct {
+ u32 target;
+ union {
+ BitField<0, 1, u32> sample_mask;
+ BitField<1, 1, u32> depth;
+ BitField<2, 30, u32> reserved;
+ };
+ } omap;
+
+ [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept {
+ const u32 bits{omap.target >> (rt * 4)};
+ return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0};
+ }
+
+ [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const {
+ const auto& vector{imap_generic_vector[attribute]};
+ return {vector.x, vector.y, vector.z, vector.w};
+ }
+
+ [[nodiscard]] bool IsGenericVectorActive(size_t index) const {
+ return imap_generic_vector[index].raw != 0;
+ }
+ } ps;
+
+ std::array<u32, 0xf> raw;
+ };
+
+ [[nodiscard]] u64 LocalMemorySize() const noexcept {
+ return static_cast<u64>(common1.shader_local_memory_low_size) |
+ (static_cast<u64>(common2.shader_local_memory_high_size) << 24);
+ }
+};
+static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size");
+
+} // namespace Shader
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
new file mode 100644
index 000000000..f3f83a258
--- /dev/null
+++ b/src/shader_recompiler/runtime_info.h
@@ -0,0 +1,88 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <optional>
+#include <vector>
+
+#include "common/common_types.h"
+#include "shader_recompiler/varying_state.h"
+
+namespace Shader {
+
+enum class AttributeType : u8 {
+ Float,
+ SignedInt,
+ UnsignedInt,
+ Disabled,
+};
+
+enum class InputTopology {
+ Points,
+ Lines,
+ LinesAdjacency,
+ Triangles,
+ TrianglesAdjacency,
+};
+
+enum class CompareFunction {
+ Never,
+ Less,
+ Equal,
+ LessThanEqual,
+ Greater,
+ NotEqual,
+ GreaterThanEqual,
+ Always,
+};
+
+enum class TessPrimitive {
+ Isolines,
+ Triangles,
+ Quads,
+};
+
+enum class TessSpacing {
+ Equal,
+ FractionalOdd,
+ FractionalEven,
+};
+
+struct TransformFeedbackVarying {
+ u32 buffer{};
+ u32 stride{};
+ u32 offset{};
+ u32 components{};
+};
+
+struct RuntimeInfo {
+ std::array<AttributeType, 32> generic_input_types{};
+ VaryingState previous_stage_stores;
+
+ bool convert_depth_mode{};
+ bool force_early_z{};
+
+ TessPrimitive tess_primitive{};
+ TessSpacing tess_spacing{};
+ bool tess_clockwise{};
+
+ InputTopology input_topology{};
+
+ std::optional<float> fixed_state_point_size;
+ std::optional<CompareFunction> alpha_test_func;
+ float alpha_test_reference{};
+
+ /// Static Y negate value
+ bool y_negate{};
+ /// Use storage buffers instead of global pointers on GLASM
+ bool glasm_use_storage_buffers{};
+
+ /// Transform feedback state for each varying
+ std::vector<TransformFeedbackVarying> xfb_varyings;
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
new file mode 100644
index 000000000..4ef4dbd40
--- /dev/null
+++ b/src/shader_recompiler/shader_info.h
@@ -0,0 +1,193 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/varying_state.h"
+
+#include <boost/container/small_vector.hpp>
+#include <boost/container/static_vector.hpp>
+
+namespace Shader {
+
+enum class TextureType : u32 {
+ Color1D,
+ ColorArray1D,
+ Color2D,
+ ColorArray2D,
+ Color3D,
+ ColorCube,
+ ColorArrayCube,
+ Buffer,
+};
+constexpr u32 NUM_TEXTURE_TYPES = 8;
+
+enum class ImageFormat : u32 {
+ Typeless,
+ R8_UINT,
+ R8_SINT,
+ R16_UINT,
+ R16_SINT,
+ R32_UINT,
+ R32G32_UINT,
+ R32G32B32A32_UINT,
+};
+
+enum class Interpolation {
+ Smooth,
+ Flat,
+ NoPerspective,
+};
+
+struct ConstantBufferDescriptor {
+ u32 index;
+ u32 count;
+};
+
+struct StorageBufferDescriptor {
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 count;
+ bool is_written;
+};
+
+struct TextureBufferDescriptor {
+ bool has_secondary;
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 secondary_cbuf_index;
+ u32 secondary_cbuf_offset;
+ u32 count;
+ u32 size_shift;
+};
+using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
+
+struct ImageBufferDescriptor {
+ ImageFormat format;
+ bool is_written;
+ bool is_read;
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 count;
+ u32 size_shift;
+};
+using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
+
+struct TextureDescriptor {
+ TextureType type;
+ bool is_depth;
+ bool has_secondary;
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 secondary_cbuf_index;
+ u32 secondary_cbuf_offset;
+ u32 count;
+ u32 size_shift;
+};
+using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
+
+struct ImageDescriptor {
+ TextureType type;
+ ImageFormat format;
+ bool is_written;
+ bool is_read;
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 count;
+ u32 size_shift;
+};
+using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
+
+struct Info {
+ static constexpr size_t MAX_CBUFS{18};
+ static constexpr size_t MAX_SSBOS{32};
+
+ bool uses_workgroup_id{};
+ bool uses_local_invocation_id{};
+ bool uses_invocation_id{};
+ bool uses_sample_id{};
+ bool uses_is_helper_invocation{};
+ bool uses_subgroup_invocation_id{};
+ bool uses_subgroup_shuffles{};
+ std::array<bool, 30> uses_patches{};
+
+ std::array<Interpolation, 32> interpolation{};
+ VaryingState loads;
+ VaryingState stores;
+ VaryingState passthrough;
+
+ bool loads_indexed_attributes{};
+
+ std::array<bool, 8> stores_frag_color{};
+ bool stores_sample_mask{};
+ bool stores_frag_depth{};
+
+ bool stores_tess_level_outer{};
+ bool stores_tess_level_inner{};
+
+ bool stores_indexed_attributes{};
+
+ bool stores_global_memory{};
+
+ bool uses_fp16{};
+ bool uses_fp64{};
+ bool uses_fp16_denorms_flush{};
+ bool uses_fp16_denorms_preserve{};
+ bool uses_fp32_denorms_flush{};
+ bool uses_fp32_denorms_preserve{};
+ bool uses_int8{};
+ bool uses_int16{};
+ bool uses_int64{};
+ bool uses_image_1d{};
+ bool uses_sampled_1d{};
+ bool uses_sparse_residency{};
+ bool uses_demote_to_helper_invocation{};
+ bool uses_subgroup_vote{};
+ bool uses_subgroup_mask{};
+ bool uses_fswzadd{};
+ bool uses_derivatives{};
+ bool uses_typeless_image_reads{};
+ bool uses_typeless_image_writes{};
+ bool uses_image_buffers{};
+ bool uses_shared_increment{};
+ bool uses_shared_decrement{};
+ bool uses_global_increment{};
+ bool uses_global_decrement{};
+ bool uses_atomic_f32_add{};
+ bool uses_atomic_f16x2_add{};
+ bool uses_atomic_f16x2_min{};
+ bool uses_atomic_f16x2_max{};
+ bool uses_atomic_f32x2_add{};
+ bool uses_atomic_f32x2_min{};
+ bool uses_atomic_f32x2_max{};
+ bool uses_atomic_s32_min{};
+ bool uses_atomic_s32_max{};
+ bool uses_int64_bit_atomics{};
+ bool uses_global_memory{};
+ bool uses_atomic_image_u32{};
+ bool uses_shadow_lod{};
+
+ IR::Type used_constant_buffer_types{};
+ IR::Type used_storage_buffer_types{};
+
+ u32 constant_buffer_mask{};
+ std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};
+ u32 nvn_buffer_base{};
+ std::bitset<16> nvn_buffer_used{};
+
+ boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
+ constant_buffer_descriptors;
+ boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
+ TextureBufferDescriptors texture_buffer_descriptors;
+ ImageBufferDescriptors image_buffer_descriptors;
+ TextureDescriptors texture_descriptors;
+ ImageDescriptors image_descriptors;
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h
new file mode 100644
index 000000000..5c1c8d8fc
--- /dev/null
+++ b/src/shader_recompiler/stage.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader {
+
+enum class Stage : u32 {
+ VertexB,
+ TessellationControl,
+ TessellationEval,
+ Geometry,
+ Fragment,
+
+ Compute,
+
+ VertexA,
+};
+constexpr u32 MaxStageTypes = 6;
+
+[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
+ return static_cast<Stage>(static_cast<size_t>(Stage::VertexB) + index);
+}
+
+} // namespace Shader
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h
new file mode 100644
index 000000000..9d7b24a76
--- /dev/null
+++ b/src/shader_recompiler/varying_state.h
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <cstddef>
+
+#include "shader_recompiler/frontend/ir/attribute.h"
+
+namespace Shader {
+
+struct VaryingState {
+ std::bitset<256> mask{};
+
+ void Set(IR::Attribute attribute, bool state = true) {
+ mask[static_cast<size_t>(attribute)] = state;
+ }
+
+ [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept {
+ return mask[static_cast<size_t>(attribute)];
+ }
+
+ [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept {
+ return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] ||
+ mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3];
+ }
+
+ [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept {
+ return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] &&
+ mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3];
+ }
+
+ [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept {
+ return AnyComponent(base) == AllComponents(base);
+ }
+
+ [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept {
+ return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component];
+ }
+
+ [[nodiscard]] bool Generic(size_t index) const noexcept {
+ return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3);
+ }
+
+ [[nodiscard]] bool ClipDistances() const noexcept {
+ return AnyComponent(IR::Attribute::ClipDistance0) ||
+ AnyComponent(IR::Attribute::ClipDistance4);
+ }
+
+ [[nodiscard]] bool Legacy() const noexcept {
+ return AnyComponent(IR::Attribute::ColorFrontDiffuseR) ||
+ AnyComponent(IR::Attribute::ColorFrontSpecularR) ||
+ AnyComponent(IR::Attribute::ColorBackDiffuseR) ||
+ AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture();
+ }
+
+ [[nodiscard]] bool FixedFunctionTexture() const noexcept {
+ for (size_t index = 0; index < 10; ++index) {
+ if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+} // namespace Shader